@mastra/mcp-docs-server 1.1.29-alpha.9 → 1.1.30-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/.docs/docs/agents/background-tasks.md +242 -0
  2. package/.docs/docs/agents/supervisor-agents.md +35 -4
  3. package/.docs/docs/agents/using-tools.md +1 -0
  4. package/.docs/docs/observability/metrics/overview.md +2 -2
  5. package/.docs/docs/observability/tracing/exporters/default.md +3 -3
  6. package/.docs/docs/observability/tracing/exporters/laminar.md +22 -14
  7. package/.docs/docs/streaming/background-task-streaming.md +80 -0
  8. package/.docs/docs/streaming/overview.md +3 -0
  9. package/.docs/docs/workspace/filesystem.md +1 -1
  10. package/.docs/guides/build-your-ui/assistant-ui.md +1 -1
  11. package/.docs/models/gateways/openrouter.md +3 -1
  12. package/.docs/models/index.md +1 -1
  13. package/.docs/models/providers/anthropic.md +4 -2
  14. package/.docs/models/providers/baseten.md +2 -1
  15. package/.docs/models/providers/deepinfra.md +2 -1
  16. package/.docs/models/providers/fireworks-ai.md +2 -1
  17. package/.docs/models/providers/kilo.md +3 -1
  18. package/.docs/models/providers/nvidia.md +2 -1
  19. package/.docs/models/providers/openai.md +2 -1
  20. package/.docs/models/providers/wandb.md +3 -2
  21. package/.docs/models/providers/zai-coding-plan.md +9 -8
  22. package/.docs/models/providers/zenmux.md +8 -1
  23. package/.docs/reference/client-js/agents.md +24 -0
  24. package/.docs/reference/configuration.md +63 -0
  25. package/.docs/reference/harness/harness-class.md +53 -10
  26. package/.docs/reference/index.md +3 -0
  27. package/.docs/reference/observability/metrics/automatic-metrics.md +2 -2
  28. package/.docs/reference/observability/tracing/interfaces.md +17 -0
  29. package/.docs/reference/processors/stream-error-retry-processor.md +54 -0
  30. package/.docs/reference/storage/clickhouse.md +274 -0
  31. package/.docs/reference/storage/composite.md +5 -3
  32. package/.docs/reference/streaming/ChunkType.md +140 -0
  33. package/.docs/reference/streaming/agents/streamUntilIdle.md +94 -0
  34. package/.docs/reference/workspace/s3-filesystem.md +79 -5
  35. package/CHANGELOG.md +37 -0
  36. package/package.json +6 -6
@@ -1,6 +1,6 @@
1
1
  # ![Deep Infra logo](https://models.dev/logos/deepinfra.svg)Deep Infra
2
2
 
3
- Access 32 Deep Infra models through Mastra's model router. Authentication is handled automatically using the `DEEPINFRA_API_KEY` environment variable.
3
+ Access 33 Deep Infra models through Mastra's model router. Authentication is handled automatically using the `DEEPINFRA_API_KEY` environment variable.
4
4
 
5
5
  Learn more in the [Deep Infra documentation](https://deepinfra.com/models).
6
6
 
@@ -36,6 +36,7 @@ for await (const chunk of stream) {
36
36
  | `deepinfra/anthropic/claude-4-opus` | 200K | | | | | | $17 | $83 |
37
37
  | `deepinfra/deepseek-ai/DeepSeek-R1-0528` | 164K | | | | | | $0.50 | $2 |
38
38
  | `deepinfra/deepseek-ai/DeepSeek-V3.2` | 164K | | | | | | $0.26 | $0.38 |
39
+ | `deepinfra/deepseek-ai/DeepSeek-V4-Pro` | 66K | | | | | | $2 | $3 |
39
40
  | `deepinfra/meta-llama/Llama-3.1-70B-Instruct` | 131K | | | | | | $0.40 | $0.40 |
40
41
  | `deepinfra/meta-llama/Llama-3.1-70B-Instruct-Turbo` | 131K | | | | | | $0.40 | $0.40 |
41
42
  | `deepinfra/meta-llama/Llama-3.1-8B-Instruct` | 131K | | | | | | $0.02 | $0.05 |
@@ -1,6 +1,6 @@
1
1
  # ![Fireworks AI logo](https://models.dev/logos/fireworks-ai.svg)Fireworks AI
2
2
 
3
- Access 18 Fireworks AI models through Mastra's model router. Authentication is handled automatically using the `FIREWORKS_API_KEY` environment variable.
3
+ Access 19 Fireworks AI models through Mastra's model router. Authentication is handled automatically using the `FIREWORKS_API_KEY` environment variable.
4
4
 
5
5
  Learn more in the [Fireworks AI documentation](https://fireworks.ai/docs/).
6
6
 
@@ -36,6 +36,7 @@ for await (const chunk of stream) {
36
36
  | --------------------------------------------------------- | ------- | ----- | --------- | ----- | ----- | ----- | ---------- | ----------- |
37
37
  | `fireworks-ai/accounts/fireworks/models/deepseek-v3p1` | 164K | | | | | | $0.56 | $2 |
38
38
  | `fireworks-ai/accounts/fireworks/models/deepseek-v3p2` | 160K | | | | | | $0.56 | $2 |
39
+ | `fireworks-ai/accounts/fireworks/models/deepseek-v4-pro` | 1.0M | | | | | | $2 | $3 |
39
40
  | `fireworks-ai/accounts/fireworks/models/glm-4p5` | 131K | | | | | | $0.55 | $2 |
40
41
  | `fireworks-ai/accounts/fireworks/models/glm-4p5-air` | 131K | | | | | | $0.22 | $0.88 |
41
42
  | `fireworks-ai/accounts/fireworks/models/glm-4p7` | 198K | | | | | | $0.60 | $2 |
@@ -1,6 +1,6 @@
1
1
  # ![Kilo Gateway logo](https://models.dev/logos/kilo.svg)Kilo Gateway
2
2
 
3
- Access 335 Kilo Gateway models through Mastra's model router. Authentication is handled automatically using the `KILO_API_KEY` environment variable.
3
+ Access 337 Kilo Gateway models through Mastra's model router. Authentication is handled automatically using the `KILO_API_KEY` environment variable.
4
4
 
5
5
  Learn more in the [Kilo Gateway documentation](https://kilo.ai).
6
6
 
@@ -357,6 +357,8 @@ for await (const chunk of stream) {
357
357
  | `kilo/xiaomi/mimo-v2-flash` | 262K | | | | | | $0.09 | $0.29 |
358
358
  | `kilo/xiaomi/mimo-v2-omni` | 262K | | | | | | $0.40 | $2 |
359
359
  | `kilo/xiaomi/mimo-v2-pro` | 1.0M | | | | | | $1 | $3 |
360
+ | `kilo/xiaomi/mimo-v2.5` | 1.0M | | | | | | $0.40 | $2 |
361
+ | `kilo/xiaomi/mimo-v2.5-pro` | 1.0M | | | | | | $1 | $3 |
360
362
  | `kilo/z-ai/glm-4-32b` | 128K | | | | | | $0.10 | $0.10 |
361
363
  | `kilo/z-ai/glm-4.5` | 131K | | | | | | $0.60 | $2 |
362
364
  | `kilo/z-ai/glm-4.5-air` | 131K | | | | | | $0.13 | $0.85 |
@@ -1,6 +1,6 @@
1
1
  # ![Nvidia logo](https://models.dev/logos/nvidia.svg)Nvidia
2
2
 
3
- Access 79 Nvidia models through Mastra's model router. Authentication is handled automatically using the `NVIDIA_API_KEY` environment variable.
3
+ Access 80 Nvidia models through Mastra's model router. Authentication is handled automatically using the `NVIDIA_API_KEY` environment variable.
4
4
 
5
5
  Learn more in the [Nvidia documentation](https://docs.api.nvidia.com/nim/).
6
6
 
@@ -95,6 +95,7 @@ for await (const chunk of stream) {
95
95
  | `nvidia/nvidia/llama3-chatqa-1.5-70b` | 128K | | | | | | — | — |
96
96
  | `nvidia/nvidia/nemoretriever-ocr-v1` | — | | | | | | — | — |
97
97
  | `nvidia/nvidia/nemotron-3-nano-30b-a3b` | 131K | | | | | | — | — |
98
+ | `nvidia/nvidia/nemotron-3-nano-omni-30b-a3b-reasoning` | 256K | | | | | | — | — |
98
99
  | `nvidia/nvidia/nemotron-3-super-120b-a12b` | 262K | | | | | | $0.20 | $0.80 |
99
100
  | `nvidia/nvidia/nemotron-4-340b-instruct` | 128K | | | | | | — | — |
100
101
  | `nvidia/nvidia/nvidia-nemotron-nano-9b-v2` | 131K | | | | | | — | — |
@@ -1,6 +1,6 @@
1
1
  # ![OpenAI logo](https://models.dev/logos/openai.svg)OpenAI
2
2
 
3
- Access 51 OpenAI models through Mastra's model router. Authentication is handled automatically using the `OPENAI_API_KEY` environment variable.
3
+ Access 52 OpenAI models through Mastra's model router. Authentication is handled automatically using the `OPENAI_API_KEY` environment variable.
4
4
 
5
5
  Learn more in the [OpenAI documentation](https://platform.openai.com/docs/models).
6
6
 
@@ -67,6 +67,7 @@ for await (const chunk of stream) {
67
67
  | `openai/gpt-5.4-nano` | 400K | | | | | | $0.20 | $1 |
68
68
  | `openai/gpt-5.4-pro` | 1.1M | | | | | | $30 | $180 |
69
69
  | `openai/gpt-5.5` | 1.1M | | | | | | $5 | $30 |
70
+ | `openai/gpt-5.5-pro` | 1.1M | | | | | | $30 | $180 |
70
71
  | `openai/gpt-image-1` | — | | | | | | — | — |
71
72
  | `openai/gpt-image-1-mini` | — | | | | | | — | — |
72
73
  | `openai/gpt-image-1.5` | — | | | | | | — | — |
@@ -1,6 +1,6 @@
1
1
  # ![Weights & Biases logo](https://models.dev/logos/wandb.svg)Weights & Biases
2
2
 
3
- Access 17 Weights & Biases models through Mastra's model router. Authentication is handled automatically using the `WANDB_API_KEY` environment variable.
3
+ Access 18 Weights & Biases models through Mastra's model router. Authentication is handled automatically using the `WANDB_API_KEY` environment variable.
4
4
 
5
5
  Learn more in the [Weights & Biases documentation](https://docs.wandb.ai).
6
6
 
@@ -51,6 +51,7 @@ for await (const chunk of stream) {
51
51
  | `wandb/Qwen/Qwen3-30B-A3B-Instruct-2507` | 262K | | | | | | $0.10 | $0.30 |
52
52
  | `wandb/Qwen/Qwen3-Coder-480B-A35B-Instruct` | 262K | | | | | | $1 | $2 |
53
53
  | `wandb/zai-org/GLM-5-FP8` | 200K | | | | | | $1 | $3 |
54
+ | `wandb/zai-org/GLM-5.1` | 200K | | | | | | $1 | $4 |
54
55
 
55
56
  ## Advanced configuration
56
57
 
@@ -80,7 +81,7 @@ const agent = new Agent({
80
81
  model: ({ requestContext }) => {
81
82
  const useAdvanced = requestContext.task === "complex";
82
83
  return useAdvanced
83
- ? "wandb/zai-org/GLM-5-FP8"
84
+ ? "wandb/zai-org/GLM-5.1"
84
85
  : "wandb/MiniMaxAI/MiniMax-M2.5";
85
86
  }
86
87
  });
@@ -1,6 +1,6 @@
1
1
  # ![Z.AI Coding Plan logo](https://models.dev/logos/zai-coding-plan.svg)Z.AI Coding Plan
2
2
 
3
- Access 4 Z.AI Coding Plan models through Mastra's model router. Authentication is handled automatically using the `ZHIPU_API_KEY` environment variable.
3
+ Access 5 Z.AI Coding Plan models through Mastra's model router. Authentication is handled automatically using the `ZHIPU_API_KEY` environment variable.
4
4
 
5
5
  Learn more in the [Z.AI Coding Plan documentation](https://docs.z.ai/devpack/overview).
6
6
 
@@ -32,12 +32,13 @@ for await (const chunk of stream) {
32
32
 
33
33
  ## Models
34
34
 
35
- | Model | Context | Tools | Reasoning | Image | Audio | Video | Input $/1M | Output $/1M |
36
- | ----------------------------- | ------- | ----- | --------- | ----- | ----- | ----- | ---------- | ----------- |
37
- | `zai-coding-plan/glm-4.5-air` | 131K | | | | | | — | — |
38
- | `zai-coding-plan/glm-4.7` | 205K | | | | | | — | — |
39
- | `zai-coding-plan/glm-5-turbo` | 200K | | | | | | — | — |
40
- | `zai-coding-plan/glm-5.1` | 200K | | | | | | — | — |
35
+ | Model | Context | Tools | Reasoning | Image | Audio | Video | Input $/1M | Output $/1M |
36
+ | ------------------------------ | ------- | ----- | --------- | ----- | ----- | ----- | ---------- | ----------- |
37
+ | `zai-coding-plan/glm-4.5-air` | 131K | | | | | | — | — |
38
+ | `zai-coding-plan/glm-4.7` | 205K | | | | | | — | — |
39
+ | `zai-coding-plan/glm-5-turbo` | 200K | | | | | | — | — |
40
+ | `zai-coding-plan/glm-5.1` | 200K | | | | | | — | — |
41
+ | `zai-coding-plan/glm-5v-turbo` | 200K | | | | | | — | — |
41
42
 
42
43
  ## Advanced configuration
43
44
 
@@ -67,7 +68,7 @@ const agent = new Agent({
67
68
  model: ({ requestContext }) => {
68
69
  const useAdvanced = requestContext.task === "complex";
69
70
  return useAdvanced
70
- ? "zai-coding-plan/glm-5.1"
71
+ ? "zai-coding-plan/glm-5v-turbo"
71
72
  : "zai-coding-plan/glm-4.5-air";
72
73
  }
73
74
  });
@@ -1,6 +1,6 @@
1
1
  # ![ZenMux logo](https://models.dev/logos/zenmux.svg)ZenMux
2
2
 
3
- Access 89 ZenMux models through Mastra's model router. Authentication is handled automatically using the `ZENMUX_API_KEY` environment variable.
3
+ Access 96 ZenMux models through Mastra's model router. Authentication is handled automatically using the `ZENMUX_API_KEY` environment variable.
4
4
 
5
5
  Learn more in the [ZenMux documentation](https://docs.zenmux.ai).
6
6
 
@@ -49,6 +49,8 @@ for await (const chunk of stream) {
49
49
  | `zenmux/deepseek/deepseek-chat` | 128K | | | | | | $0.28 | $0.42 |
50
50
  | `zenmux/deepseek/deepseek-v3.2` | 128K | | | | | | $0.28 | $0.43 |
51
51
  | `zenmux/deepseek/deepseek-v3.2-exp` | 163K | | | | | | $0.22 | $0.33 |
52
+ | `zenmux/deepseek/deepseek-v4-flash` | 1.0M | | | | | | $0.14 | $0.28 |
53
+ | `zenmux/deepseek/deepseek-v4-pro` | 1.0M | | | | | | $2 | $3 |
52
54
  | `zenmux/google/gemini-2.5-flash` | 1.0M | | | | | | $0.30 | $3 |
53
55
  | `zenmux/google/gemini-2.5-flash-lite` | 1.0M | | | | | | $0.10 | $0.40 |
54
56
  | `zenmux/google/gemini-2.5-pro` | 1.0M | | | | | | $1 | $10 |
@@ -84,6 +86,8 @@ for await (const chunk of stream) {
84
86
  | `zenmux/openai/gpt-5.4-mini` | 400K | | | | | | $0.75 | $5 |
85
87
  | `zenmux/openai/gpt-5.4-nano` | 400K | | | | | | $0.20 | $1 |
86
88
  | `zenmux/openai/gpt-5.4-pro` | 1.1M | | | | | | $45 | $225 |
89
+ | `zenmux/openai/gpt-5.5` | 1.1M | | | | | | $5 | $30 |
90
+ | `zenmux/openai/gpt-5.5-pro` | 1.1M | | | | | | $30 | $180 |
87
91
  | `zenmux/qwen/qwen3-coder-plus` | 1.0M | | | | | | $1 | $5 |
88
92
  | `zenmux/qwen/qwen3-max` | 256K | | | | | | $1 | $6 |
89
93
  | `zenmux/qwen/qwen3.5-flash` | 1.0M | | | | | | $0.10 | $0.40 |
@@ -94,6 +98,7 @@ for await (const chunk of stream) {
94
98
  | `zenmux/stepfun/step-3` | 66K | | | | | | $0.21 | $0.57 |
95
99
  | `zenmux/stepfun/step-3.5-flash` | 256K | | | | | | $0.10 | $0.30 |
96
100
  | `zenmux/stepfun/step-3.5-flash-free` | 256K | | | | | | — | — |
101
+ | `zenmux/tencent/hy3-preview` | 256K | | | | | | $0.17 | $0.57 |
97
102
  | `zenmux/volcengine/doubao-seed-1.8` | 256K | | | | | | $0.11 | $0.28 |
98
103
  | `zenmux/volcengine/doubao-seed-2.0-code` | 256K | | | | | | $0.90 | $4 |
99
104
  | `zenmux/volcengine/doubao-seed-2.0-lite` | 256K | | | | | | $0.09 | $0.51 |
@@ -110,6 +115,8 @@ for await (const chunk of stream) {
110
115
  | `zenmux/xiaomi/mimo-v2-flash` | 262K | | | | | | $0.10 | $0.30 |
111
116
  | `zenmux/xiaomi/mimo-v2-omni` | 265K | | | | | | $0.40 | $2 |
112
117
  | `zenmux/xiaomi/mimo-v2-pro` | 1.0M | | | | | | $2 | $5 |
118
+ | `zenmux/xiaomi/mimo-v2.5` | 1.0M | | | | | | $0.40 | $2 |
119
+ | `zenmux/xiaomi/mimo-v2.5-pro` | 1.0M | | | | | | $1 | $3 |
113
120
  | `zenmux/z-ai/glm-4.5` | 128K | | | | | | $0.35 | $2 |
114
121
  | `zenmux/z-ai/glm-4.5-air` | 128K | | | | | | $0.11 | $0.56 |
115
122
  | `zenmux/z-ai/glm-4.6` | 200K | | | | | | $0.35 | $2 |
@@ -151,6 +151,30 @@ for await (const part of uiMessageStream) {
151
151
  }
152
152
  ```
153
153
 
154
+ ### `streamUntilIdle()`
155
+
156
+ Stream a response and keep the stream open until every [background task](https://mastra.ai/docs/agents/background-tasks) dispatched during the run completes. The server re-enters the agentic loop on each task completion so the LLM can react to results in the same call. Requires background tasks to be [enabled on the Mastra instance](https://mastra.ai/reference/configuration) and a memory thread; otherwise the call falls through to a plain `stream()`.
157
+
158
+ ```typescript
159
+ const response = await agent.streamUntilIdle('Research solana for me', {
160
+ memory: {
161
+ thread: 'thread-1',
162
+ resource: 'resource-1',
163
+ },
164
+ maxIdleMs: 5 * 60_000,
165
+ })
166
+
167
+ response.processDataStream({
168
+ onChunk: async chunk => {
169
+ if (chunk.type === 'background-task-completed') {
170
+ console.log('task complete:', chunk.payload.taskId)
171
+ }
172
+ },
173
+ })
174
+ ```
175
+
176
+ The stream emits the same chunk types as `stream()`, plus `background-task-*` chunks for task lifecycle events. Visit [`Agent.streamUntilIdle()`](https://mastra.ai/reference/streaming/agents/streamUntilIdle) for the full server-side API and [background task chunks](https://mastra.ai/reference/streaming/ChunkType) for the payload shapes.
177
+
154
178
  ### `getTool()`
155
179
 
156
180
  Retrieve information about a specific tool available to the agent:
@@ -36,6 +36,69 @@ export const mastra = new Mastra({
36
36
  })
37
37
  ```
38
38
 
39
+ ### backgroundTasks
40
+
41
+ **Type:** `BackgroundTaskManagerConfig`
42
+
43
+ Enables and configures the background task manager. When enabled, agents can dispatch long-running tool calls (including subagent invocations) to run asynchronously while the agentic loop continues. Tasks are persisted, so a configured `storage` backend is required.
44
+
45
+ Visit the [Background tasks documentation](https://mastra.ai/docs/agents/background-tasks) to learn more.
46
+
47
+ ```typescript
48
+ import { Mastra } from '@mastra/core'
49
+ import { LibSQLStore } from '@mastra/libsql'
50
+
51
+ export const mastra = new Mastra({
52
+ storage: new LibSQLStore({
53
+ id: 'mastra-storage',
54
+ url: 'file:./mastra.db',
55
+ }),
56
+ backgroundTasks: {
57
+ enabled: true,
58
+ globalConcurrency: 10,
59
+ perAgentConcurrency: 5,
60
+ backpressure: 'queue',
61
+ defaultTimeoutMs: 300_000,
62
+ },
63
+ })
64
+ ```
65
+
66
+ **enabled** (`boolean`): Whether background tasks are enabled. The manager only initializes when this is true and a storage backend is configured. (Default: `false`)
67
+
68
+ **globalConcurrency** (`number`): Maximum number of background tasks running concurrently across all agents. (Default: `10`)
69
+
70
+ **perAgentConcurrency** (`number`): Maximum number of background tasks running concurrently for a single agent. (Default: `5`)
71
+
72
+ **backpressure** (`'queue' | 'reject' | 'fallback-sync'`): Behavior when a concurrency limit is reached. 'queue' waits for a slot, 'reject' throws on enqueue, 'fallback-sync' runs the tool synchronously in the agentic loop instead. (Default: `'queue'`)
73
+
74
+ **defaultTimeoutMs** (`number`): Default per-task timeout in milliseconds. Can be overridden per-tool or per-call. (Default: `300000`)
75
+
76
+ **defaultRetries** (`RetryConfig`): Default retry policy applied to tasks that fail.
77
+
78
+ **defaultRetries.maxRetries** (`number`): Maximum retry attempts before the task is marked failed.
79
+
80
+ **defaultRetries.retryDelayMs** (`number`): Delay between retries in milliseconds.
81
+
82
+ **defaultRetries.backoffMultiplier** (`number`): Multiplier applied to retryDelayMs on each subsequent attempt.
83
+
84
+ **defaultRetries.maxRetryDelayMs** (`number`): Upper bound on the retry delay regardless of backoff.
85
+
86
+ **defaultRetries.retryableErrors** (`(error: Error) => boolean`): Predicate that decides whether a given error should be retried. Default: retry all errors.
87
+
88
+ **cleanup** (`CleanupConfig`): Controls how long task records are kept and how often the cleanup process runs.
89
+
90
+ **cleanup.completedTtlMs** (`number`): How long to keep completed task records, in milliseconds. Default: 1 hour.
91
+
92
+ **cleanup.failedTtlMs** (`number`): How long to keep failed task records, in milliseconds. Default: 24 hours.
93
+
94
+ **cleanup.cleanupIntervalMs** (`number`): How often the cleanup process runs, in milliseconds. Default: 1 minute.
95
+
96
+ **waitTimeoutMs** (`number`): How long the agentic loop waits for a background task to complete before moving on. If a task has not finished within this time, the loop proceeds without setting isContinued. Default: undefined (do not wait). Can be overridden per-agent or per-tool.
97
+
98
+ **onTaskComplete** (`(task: BackgroundTask) => void | Promise<void>`): Global callback invoked when any background task completes successfully. Fires in addition to per-tool and per-agent callbacks.
99
+
100
+ **onTaskFailed** (`(task: BackgroundTask) => void | Promise<void>`): Global callback invoked when any background task fails. Fires in addition to per-tool and per-agent callbacks.
101
+
39
102
  ### deployer
40
103
 
41
104
  **Type:** `MastraDeployer`
@@ -90,6 +90,8 @@ await harness.sendMessage({ content: 'Hello!' })
90
90
 
91
91
  **subagents.stopWhen** (`LoopOptions['stopWhen']`): Optional stop condition for the spawned subagent.
92
92
 
93
+ **subagents.forked** (`boolean`): When \`true\`, calls to this subagent default to forked mode: the subagent runs on a clone of the parent thread, reusing the parent agent’s instructions, tools, and model so the prompt-cache prefix stays intact. Requires \`memory\` to be configured. The subagent definition’s own \`instructions\`, \`tools\`, \`allowedHarnessTools\`, \`allowedWorkspaceTools\`, \`defaultModelId\`, \`maxSteps\`, and \`stopWhen\` are ignored in forked mode. Callers can still override per-invocation via \`forked: false\` in the \`subagent\` tool input. See the \[Forked subagents]\(#forked-subagents) section below for full semantics.
94
+
93
95
  **resolveModel** (`(modelId: string) => MastraLanguageModel`): Converts a model ID string (e.g., \`"anthropic/claude-sonnet-4"\`) to a language model instance. Used by subagents and observational memory model resolution.
94
96
 
95
97
  **omConfig** (`HarnessOMConfig`): Default configuration for observational memory (observer/reflector model IDs and thresholds).
@@ -286,16 +288,21 @@ await harness.switchThread({ threadId: 'thread-abc123' })
286
288
 
287
289
  #### `listThreads(options?)`
288
290
 
289
- List threads from storage. By default, only threads for the current resource are returned.
291
+ List threads from storage. By default, only threads for the current resource are returned, and transient [forked subagent](#forked-subagents) threads are hidden so they don’t appear in user-facing thread pickers / startup flows.
290
292
 
291
293
  ```typescript
292
- // List threads for current resource
294
+ // List threads for current resource (forks hidden)
293
295
  const threads = await harness.listThreads()
294
296
 
295
- // List all threads across resources
297
+ // List all threads across resources (forks still hidden)
296
298
  const allThreads = await harness.listThreads({ allResources: true })
299
+
300
+ // Include forked subagent fork threads (debug / admin tooling only)
301
+ const everything = await harness.listThreads({ includeForkedSubagents: true })
297
302
  ```
298
303
 
304
+ Fork threads are tagged with `metadata.forkedSubagent === true` (and `metadata.parentThreadId`) by the harness. Set `includeForkedSubagents: true` to opt back into seeing them — e.g. for a debug panel.
305
+
299
306
  #### `renameThread({ title })`
300
307
 
301
308
  Update the title of the current thread.
@@ -677,6 +684,42 @@ await harness.setSubagentModelId({ modelId: 'anthropic/claude-sonnet-4-6' })
677
684
  await harness.setSubagentModelId({ modelId: 'anthropic/claude-haiku-3.5', agentType: 'explore' })
678
685
  ```
679
686
 
687
+ ### Forked subagents
688
+
689
+ By default, a subagent runs with a fresh context — it doesn't see the parent conversation. **Forked subagents** opt into a different model: the subagent runs on a clone of the parent thread and reuses the parent agent's full configuration. This is useful when the subagent needs the full context of the conversation so far (e.g., recalling earlier user-supplied facts), and when prompt-cache hit rates matter.
690
+
691
+ #### Enabling forked mode
692
+
693
+ Set `forked: true` either on the [`HarnessSubagent` definition](#configuration) (per-type default) or on each `subagent` tool call (per-invocation override):
694
+
695
+ ```typescript
696
+ // Per-type default — every call to this subagent forks unless overridden.
697
+ const subagents: HarnessSubagent[] = [
698
+ {
699
+ id: 'collaborator',
700
+ name: 'Collaborator',
701
+ description: 'Continues the conversation in a fork to try a different angle.',
702
+ instructions: '...',
703
+ forked: true,
704
+ },
705
+ ]
706
+ ```
707
+
708
+ The model can also pass `forked: true` (or `forked: false`) per-invocation in the `subagent` tool input; the per-invocation value wins.
709
+
710
+ #### Semantics and constraints
711
+
712
+ - **Memory required.** Forked mode calls `memory.cloneThread` to create the fork, so the harness must have `memory` configured and an active parent thread. Calls without those return a structured error rather than throwing.
713
+ - **Parent agent reused.** The fork runs through the parent agent's `stream(...)` call. The parent's instructions, tools, model, `maxSteps`, and `stopWhen` apply. The subagent definition's `instructions`, `tools`, `allowedHarnessTools`, `allowedWorkspaceTools`, `defaultModelId`, `maxSteps`, and `stopWhen` are ignored in forked mode — this is what preserves the prompt-cache prefix.
714
+ - **Toolsets inherited, recursive forks blocked at runtime.** Forks inherit the parent's toolsets verbatim (`ask_user`, `submit_plan`, user-configured harness tools, _including the `subagent` tool itself_) so the LLM request prefix — system prompt + tool list + tool schemas + tool descriptions — stays byte-identical to the parent's. This is what preserves the prompt cache. The `subagent` entry is kept on the model side but its `execute` is replaced inside the fork with a stub that returns a non-error "tool unavailable inside a forked subagent" message: nested forks are blocked at the runtime layer without perturbing the cached prefix.
715
+ - **Fork threads are tagged.** Each fork thread is created with `metadata.forkedSubagent === true` and `metadata.parentThreadId === <parent>`. By default, [`listThreads`](#listthreadsoptions) hides these so they don't show up in user-facing thread pickers / startup flows. Pass `includeForkedSubagents: true` to see them in admin / debug tooling.
716
+ - **Save-queue flushed before clone.** The agent stream batches message saves through a debounced `SaveQueueManager`, so the parent's latest user / assistant turn may not be on disk yet when the subagent tool call fires. The fork tool flushes pending saves first via the `flushMessages` callback on `AgentToolExecutionContext` before cloning, so the fork actually carries the latest turn. Flush failures are non-fatal — the clone still runs.
717
+ - **Parent thread untouched.** All subagent activity (messages, OM writes) lands on the fork. The parent thread is never appended to during a forked subagent run.
718
+
719
+ #### When to prefer non-forked mode
720
+
721
+ Forked mode trades isolation for context inheritance. If the subagent should run with a strictly smaller toolset, a different system prompt, or a cheaper model, use the default (non-forked) mode and pass any required context explicitly in the `task` description.
722
+
680
723
  ### Events
681
724
 
682
725
  #### `subscribe(listener)`
@@ -753,13 +796,13 @@ The harness emits events through registered listeners. The following table lists
753
796
 
754
797
  The harness provides built-in tools to agents in every mode:
755
798
 
756
- | Tool | Description |
757
- | ------------- | ------------------------------------------------------------------------------------------------------------------------- |
758
- | `ask_user` | Ask the user a question and wait for their response. Supports free text, single-select choices, and multi-select choices. |
759
- | `submit_plan` | Submit a plan for user review and approval. |
760
- | `task_write` | Create or update a structured task list for tracking progress. |
761
- | `task_check` | Check the completion status of the current task list. |
762
- | `subagent` | Spawn a focused subagent with constrained tools (only available when `subagents` is configured). |
799
+ | Tool | Description |
800
+ | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
801
+ | `ask_user` | Ask the user a question and wait for their response. Supports free text, single-select choices, and multi-select choices. |
802
+ | `submit_plan` | Submit a plan for user review and approval. |
803
+ | `task_write` | Create or update a structured task list for tracking progress. |
804
+ | `task_check` | Check the completion status of the current task list. |
805
+ | `subagent` | Spawn a focused subagent with constrained tools (only available when `subagents` is configured). Pass `forked: true` to inherit the parent conversation — see [Forked subagents](#forked-subagents). |
763
806
 
764
807
  ### `ask_user` selections
765
808
 
@@ -169,6 +169,7 @@ The Reference section provides documentation of Mastra's API, including paramete
169
169
  - [PromptInjectionDetector](https://mastra.ai/reference/processors/prompt-injection-detector)
170
170
  - [SemanticRecall](https://mastra.ai/reference/processors/semantic-recall-processor)
171
171
  - [SkillSearchProcessor](https://mastra.ai/reference/processors/skill-search-processor)
172
+ - [StreamErrorRetryProcessor](https://mastra.ai/reference/processors/stream-error-retry-processor)
172
173
  - [SystemPromptScrubber](https://mastra.ai/reference/processors/system-prompt-scrubber)
173
174
  - [TokenLimiterProcessor](https://mastra.ai/reference/processors/token-limiter-processor)
174
175
  - [ToolCallFilter](https://mastra.ai/reference/processors/tool-call-filter)
@@ -193,6 +194,7 @@ The Reference section provides documentation of Mastra's API, including paramete
193
194
  - [registerApiRoute()](https://mastra.ai/reference/server/register-api-route)
194
195
  - [Server Routes](https://mastra.ai/reference/server/routes)
195
196
  - [Overview](https://mastra.ai/reference/storage/overview)
197
+ - [ClickHouse Storage](https://mastra.ai/reference/storage/clickhouse)
196
198
  - [Cloudflare D1 Storage](https://mastra.ai/reference/storage/cloudflare-d1)
197
199
  - [Cloudflare KV Storage](https://mastra.ai/reference/storage/cloudflare)
198
200
  - [Composite Storage](https://mastra.ai/reference/storage/composite)
@@ -209,6 +211,7 @@ The Reference section provides documentation of Mastra's API, including paramete
209
211
  - [MastraModelOutput](https://mastra.ai/reference/streaming/agents/MastraModelOutput)
210
212
  - [.stream()](https://mastra.ai/reference/streaming/agents/stream)
211
213
  - [.streamLegacy()](https://mastra.ai/reference/streaming/agents/streamLegacy)
214
+ - [.streamUntilIdle()](https://mastra.ai/reference/streaming/agents/streamUntilIdle)
212
215
  - [.observeStream()](https://mastra.ai/reference/streaming/workflows/observeStream)
213
216
  - [.resumeStream()](https://mastra.ai/reference/streaming/workflows/resumeStream)
214
217
  - [.stream()](https://mastra.ai/reference/streaming/workflows/stream)
@@ -8,14 +8,14 @@ For setup instructions, see the [Metrics overview](https://mastra.ai/docs/observ
8
8
 
9
9
  Metrics are extracted from spans when they end. The observability layer inspects each completed span, calculates duration, and (for model generation spans) reads token usage data. No manual instrumentation is needed.
10
10
 
11
- Metrics are routed through an internal event bus to the `DefaultExporter`, which batches and flushes them to storage. Only DuckDB and in-memory storage backends support metrics persistence today.
11
+ Metrics are routed through an internal event bus to the `DefaultExporter`, which batches and flushes them to storage.
12
12
 
13
13
  ### What affects whether a metric is available
14
14
 
15
15
  Two conditions must be true for a metric to reach storage:
16
16
 
17
17
  1. `DefaultExporter` is configured as an exporter.
18
- 2. The storage backend supports metrics (`DuckDB` or `InMemory`).
18
+ 2. The storage backend supports metrics (ClickHouse, DuckDB, or in-memory).
19
19
 
20
20
  If metrics aren't appearing, see [troubleshooting](#troubleshooting).
21
21
 
@@ -126,6 +126,21 @@ interface ObservabilityExporter {
126
126
  /** Initialize exporter with tracing configuration and/or access to Mastra */
127
127
  init?(options: InitExporterOptions): void
128
128
 
129
+ /** Handle tracing events */
130
+ onTracingEvent?(event: TracingEvent): void | Promise<void>
131
+
132
+ /** Handle log events */
133
+ onLogEvent?(event: LogEvent): void | Promise<void>
134
+
135
+ /** Handle metric events */
136
+ onMetricEvent?(event: MetricEvent): void | Promise<void>
137
+
138
+ /** Handle score events */
139
+ onScoreEvent?(event: ScoreEvent): void | Promise<void>
140
+
141
+ /** Handle feedback events */
142
+ onFeedbackEvent?(event: FeedbackEvent): void | Promise<void>
143
+
129
144
  /** Export tracing events */
130
145
  exportTracingEvent(event: TracingEvent): Promise<void>
131
146
 
@@ -154,6 +169,8 @@ interface ObservabilityExporter {
154
169
  }
155
170
  ```
156
171
 
172
+ Event callback payloads use observability event bus envelopes: `TracingEvent` carries span lifecycle events with `exportedSpan`, `LogEvent` wraps `ExportedLog` in `log`, `MetricEvent` wraps `ExportedMetric` in `metric`, `ScoreEvent` wraps `ExportedScore` in `score`, and `FeedbackEvent` wraps `ExportedFeedback` in `feedback`. For Cloud exporter behavior for these callbacks, see [CloudExporter](https://mastra.ai/reference/observability/tracing/exporters/cloud-exporter).
173
+
157
174
  ### `SpanOutputProcessor`
158
175
 
159
176
  Interface for span output processors.
@@ -0,0 +1,54 @@
1
+ # StreamErrorRetryProcessor
2
+
3
+ `StreamErrorRetryProcessor` is an **error processor** that retries transient errors emitted after an LLM stream starts. It includes built-in matching for OpenAI Responses stream errors and supports additional matchers for other provider-specific stream error shapes.
4
+
5
+ The processor isn't enabled by default in core. Add it to `errorProcessors` for agents that need stream-error retry handling.
6
+
7
+ ## Usage example
8
+
9
+ Add `StreamErrorRetryProcessor` to `errorProcessors`:
10
+
11
+ ```typescript
12
+ import { Agent } from '@mastra/core/agent'
13
+ import { StreamErrorRetryProcessor } from '@mastra/core/processors'
14
+
15
+ export const agent = new Agent({
16
+ name: 'openai-agent',
17
+ instructions: 'You are a helpful assistant.',
18
+ model: 'openai/gpt-5',
19
+ errorProcessors: [new StreamErrorRetryProcessor()],
20
+ })
21
+ ```
22
+
23
+ ## How it works
24
+
25
+ The processor checks the error and its cause chain for:
26
+
27
+ - Provider retry metadata: `isRetryable === true`
28
+ - Built-in OpenAI Responses stream error matching
29
+ - Matcher results: Any configured matcher that returns `true`
30
+
31
+ When the error is retryable, the processor returns `{ retry: true }`. It doesn't mutate messages.
32
+
33
+ ## Default OpenAI Responses matcher
34
+
35
+ `isRetryableOpenAIResponsesStreamError` matches OpenAI Responses stream error chunks with `type: 'error'` or `type: 'response.failed'`. It retries known transient OpenAI error codes and, as a fallback, errors with explicit retry guidance such as `You can retry your request`.
36
+
37
+ `StreamErrorRetryProcessor` includes this matcher by default. You can also import it and reuse it in custom retry logic.
38
+
39
+ ## Constructor parameters
40
+
41
+ **options** (`StreamErrorRetryProcessorOptions`): Configuration for retry handling.
42
+
43
+ ## Properties
44
+
45
+ **id** (`'stream-error-retry-processor'`): Processor identifier.
46
+
47
+ **name** (`'Stream Error Retry Processor'`): Processor display name.
48
+
49
+ **processAPIError** (`(args: ProcessAPIErrorArgs) => ProcessAPIErrorResult | void`): Retries stream errors up to the configured retry limit.
50
+
51
+ ## Related
52
+
53
+ - [Processor interface](https://mastra.ai/reference/processors/processor-interface)
54
+ - [Processors](https://mastra.ai/docs/agents/processors)