@mastra/mcp-docs-server 1.1.46-alpha.3 → 1.1.46-alpha.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/docs/agent-builder/browser.md +1 -1
- package/.docs/docs/agent-builder/channels.md +1 -1
- package/.docs/docs/agent-builder/integrations.md +73 -0
- package/.docs/docs/agent-builder/overview.md +1 -0
- package/.docs/docs/agents/adding-voice.md +1 -1
- package/.docs/docs/agents/agent-approval.md +2 -2
- package/.docs/docs/agents/background-tasks.md +1 -1
- package/.docs/docs/agents/channels.md +2 -2
- package/.docs/docs/agents/code-mode.md +20 -56
- package/.docs/docs/agents/overview.md +1 -0
- package/.docs/docs/agents/sdk-agents.md +165 -1
- package/.docs/docs/agents/supervisor-agents.md +40 -2
- package/.docs/docs/agents/using-tools.md +59 -1
- package/.docs/docs/browser/agent-browser.md +1 -1
- package/.docs/docs/browser/browser-viewer.md +22 -15
- package/.docs/docs/browser/overview.md +1 -1
- package/.docs/docs/browser/stagehand.md +1 -1
- package/.docs/docs/editor/overview.md +6 -6
- package/.docs/docs/editor/prompts.md +3 -3
- package/.docs/docs/editor/tools.md +2 -2
- package/.docs/docs/evals/evals-with-memory.md +8 -8
- package/.docs/docs/mastra-platform/observability.md +1 -1
- package/.docs/docs/mastra-platform/server.md +1 -1
- package/.docs/docs/mcp/mcp-apps.md +4 -4
- package/.docs/docs/memory/observational-memory.md +1 -1
- package/.docs/docs/memory/working-memory.md +2 -2
- package/.docs/docs/observability/integrations/bridges/datadog.md +1 -1
- package/.docs/docs/observability/integrations/bridges/otel.md +1 -1
- package/.docs/docs/observability/integrations/exporters/laminar.md +1 -1
- package/.docs/docs/observability/integrations/exporters/langfuse.md +26 -1
- package/.docs/docs/observability/integrations/exporters/mastra-platform.md +1 -1
- package/.docs/docs/observability/integrations/exporters/mastra-storage.md +4 -4
- package/.docs/docs/observability/integrations/exporters/otel.md +1 -1
- package/.docs/docs/observability/integrations/overview.md +1 -1
- package/.docs/docs/observability/logging.md +1 -1
- package/.docs/docs/observability/metrics/overview.md +3 -3
- package/.docs/docs/observability/metrics/querying.md +2 -2
- package/.docs/docs/observability/storage.md +2 -2
- package/.docs/docs/observability/tracing/overview.md +1 -1
- package/.docs/docs/server/auth/fga.md +15 -15
- package/.docs/docs/server/auth/okta.md +2 -2
- package/.docs/docs/server/auth/workos.md +1 -1
- package/.docs/docs/server/custom-api-routes.md +1 -1
- package/.docs/docs/server/pubsub.md +4 -4
- package/.docs/docs/studio/auth.md +1 -1
- package/.docs/docs/studio/observability.md +3 -1
- package/.docs/docs/workflows/scheduled-workflows.md +13 -13
- package/.docs/docs/workspace/filesystem.md +1 -1
- package/.docs/docs/workspace/lsp.md +1 -1
- package/.docs/docs/workspace/overview.md +35 -1
- package/.docs/docs/workspace/sandbox.md +4 -3
- package/.docs/guides/build-your-ui/ai-sdk-ui.md +2 -2
- package/.docs/guides/deployment/aws-bedrock-agentcore.md +3 -3
- package/.docs/guides/deployment/inngest.md +5 -5
- package/.docs/guides/deployment/temporal.md +3 -3
- package/.docs/guides/getting-started/nestjs.md +1 -1
- package/.docs/guides/migrations/mastra-cloud.md +3 -3
- package/.docs/guides/migrations/upgrade-to-v1/overview.md +1 -1
- package/.docs/guides/migrations/upgrade-to-v1/tracing.md +1 -1
- package/.docs/reference/acp/acp-agent.md +2 -2
- package/.docs/reference/agents/agent.md +44 -0
- package/.docs/reference/agents/channels.md +1 -1
- package/.docs/reference/agents/durable-agent.md +2 -2
- package/.docs/reference/agents/generate.md +2 -0
- package/.docs/reference/agents/generateLegacy.md +2 -0
- package/.docs/reference/ai-sdk/handle-chat-stream.md +1 -1
- package/.docs/reference/ai-sdk/to-ai-sdk-stream.md +1 -1
- package/.docs/reference/auth/okta.md +1 -1
- package/.docs/reference/auth/workos.md +1 -1
- package/.docs/reference/browser/agent-browser.md +1 -1
- package/.docs/reference/browser/browser-viewer.md +11 -9
- package/.docs/reference/browser/stagehand-browser.md +1 -1
- package/.docs/reference/cli/mastra.md +3 -1
- package/.docs/reference/client-js/responses.md +2 -2
- package/.docs/reference/client-js/workflows.md +1 -1
- package/.docs/reference/configuration.md +1 -1
- package/.docs/reference/core/removeWorkspace.md +26 -0
- package/.docs/reference/editor/browser-provider.md +1 -1
- package/.docs/reference/editor/storage-browser-ref.md +3 -3
- package/.docs/reference/editor/storage-workspace-ref.md +1 -1
- package/.docs/reference/evals/rubric.md +113 -0
- package/.docs/reference/evals/trajectory-accuracy.md +3 -3
- package/.docs/reference/harness/harness-class.md +81 -62
- package/.docs/reference/index.md +5 -0
- package/.docs/reference/memory/serialized-memory-config.md +1 -1
- package/.docs/reference/observability/metrics/automatic-metrics.md +3 -3
- package/.docs/reference/observability/tracing/bridges/datadog.md +1 -1
- package/.docs/reference/observability/tracing/exporters/cloud-exporter.md +3 -3
- package/.docs/reference/observability/tracing/exporters/default-exporter.md +1 -1
- package/.docs/reference/observability/tracing/exporters/mastra-platform-exporter.md +5 -5
- package/.docs/reference/observability/tracing/exporters/mastra-storage-exporter.md +1 -1
- package/.docs/reference/observability/tracing/exporters/otel.md +1 -1
- package/.docs/reference/observability/tracing/processors/sensitive-data-filter.md +2 -2
- package/.docs/reference/processors/cost-guard-processor.md +2 -2
- package/.docs/reference/processors/processor-interface.md +4 -4
- package/.docs/reference/processors/response-cache.md +2 -2
- package/.docs/reference/processors/skill-search-processor.md +3 -3
- package/.docs/reference/processors/tool-search-processor.md +108 -4
- package/.docs/reference/pubsub/base.md +1 -1
- package/.docs/reference/pubsub/caching-pubsub.md +2 -2
- package/.docs/reference/pubsub/event-emitter.md +3 -3
- package/.docs/reference/pubsub/google-cloud-pubsub.md +1 -1
- package/.docs/reference/pubsub/redis-streams.md +1 -1
- package/.docs/reference/pubsub/unix-socket-pubsub.md +1 -1
- package/.docs/reference/server/nestjs-adapter.md +2 -2
- package/.docs/reference/signals/task-signal-provider.md +62 -0
- package/.docs/reference/storage/clickhouse.md +49 -4
- package/.docs/reference/storage/composite.md +33 -1
- package/.docs/reference/storage/convex.md +2 -2
- package/.docs/reference/storage/dsql.md +7 -7
- package/.docs/reference/storage/duckdb.md +3 -3
- package/.docs/reference/storage/redis.md +3 -3
- package/.docs/reference/storage/spanner.md +7 -7
- package/.docs/reference/streaming/agents/stream.md +2 -0
- package/.docs/reference/streaming/agents/streamLegacy.md +2 -0
- package/.docs/reference/streaming/agents/streamUntilIdle.md +1 -1
- package/.docs/reference/tools/brightdata.md +3 -3
- package/.docs/reference/tools/create-code-mode.md +124 -0
- package/.docs/reference/tools/create-tool.md +1 -1
- package/.docs/reference/tools/mcp-client.md +5 -5
- package/.docs/reference/tools/mcp-server.md +45 -0
- package/.docs/reference/tools/perplexity.md +4 -4
- package/.docs/reference/tools/tavily.md +3 -3
- package/.docs/reference/voice/aws-nova-sonic.md +1 -1
- package/.docs/reference/voice/google-gemini-live.md +1 -1
- package/.docs/reference/voice/inworld-realtime.md +5 -5
- package/.docs/reference/voice/inworld.md +1 -1
- package/.docs/reference/voice/sarvam.md +1 -1
- package/.docs/reference/workspace/agentcore-runtime-sandbox.md +7 -7
- package/.docs/reference/workspace/azure-blob-filesystem.md +2 -2
- package/.docs/reference/workspace/files-sdk-filesystem.md +3 -3
- package/.docs/reference/workspace/google-drive-filesystem.md +7 -7
- package/.docs/reference/workspace/modal-sandbox.md +1 -1
- package/.docs/reference/workspace/railway-sandbox.md +230 -0
- package/.docs/reference/workspace/vercel-microvm-sandbox.md +1 -1
- package/.docs/reference/workspace/vercel.md +2 -2
- package/.docs/reference/workspace/workspace-class.md +39 -3
- package/CHANGELOG.md +10 -0
- package/dist/chunk-GLPCVXXO.js +2075 -0
- package/dist/chunk-GLPCVXXO.js.map +1 -0
- package/dist/index.js +3 -0
- package/dist/index.js.map +1 -0
- package/dist/stdio.js +1 -2070
- package/dist/stdio.js.map +1 -1
- package/package.json +5 -5
|
@@ -19,7 +19,7 @@ When the source agent stream includes a final structured output object, `toAISdk
|
|
|
19
19
|
}
|
|
20
20
|
```
|
|
21
21
|
|
|
22
|
-
The `object` field contains your full structured output value. This maps Mastra's final structured output chunk into the AI SDK UI stream. Partial structured output chunks
|
|
22
|
+
The `object` field contains your full structured output value. This maps Mastra's final structured output chunk into the AI SDK UI stream. Partial structured output chunks aren't emitted.
|
|
23
23
|
|
|
24
24
|
## Usage example
|
|
25
25
|
|
|
@@ -54,7 +54,7 @@ export const mastra = new Mastra({
|
|
|
54
54
|
|
|
55
55
|
### Environment variables
|
|
56
56
|
|
|
57
|
-
The following environment variables are automatically used when constructor options
|
|
57
|
+
The following environment variables are automatically used when constructor options aren't provided:
|
|
58
58
|
|
|
59
59
|
**OKTA\_DOMAIN** (`string`): Your Okta domain (e.g., \`dev-123456.okta.com\`). Found in your Okta admin console.
|
|
60
60
|
|
|
@@ -81,7 +81,7 @@ const auth = new MastraAuthWorkos({
|
|
|
81
81
|
})
|
|
82
82
|
```
|
|
83
83
|
|
|
84
|
-
When `trustJwtClaims` is enabled, Mastra can authenticate verified bearer tokens for service principals even if `getUser()`
|
|
84
|
+
When `trustJwtClaims` is enabled, Mastra can authenticate verified bearer tokens for service principals even if `getUser()` isn't the right lookup path. This is the preferred way to pass pre-resolved `organizationMembershipId` values into FGA checks for machine-to-machine flows.
|
|
85
85
|
|
|
86
86
|
## Custom authorization
|
|
87
87
|
|
|
@@ -363,7 +363,7 @@ Capture a screenshot of the current page as PNG (viewport by default; set `fullP
|
|
|
363
363
|
|
|
364
364
|
| Parameter | Type | Description |
|
|
365
365
|
| ---------- | --------- | ---------------------------------------------------------------------------------------- |
|
|
366
|
-
| `fullPage` | `boolean` | Capture the full scrollable page instead of
|
|
366
|
+
| `fullPage` | `boolean` | Capture the full scrollable page instead of only the viewport (optional, default: false) |
|
|
367
367
|
|
|
368
368
|
### `browser_close`
|
|
369
369
|
|
|
@@ -8,6 +8,7 @@ Use `BrowserViewer` when your agent drives a browser through a CLI tool like `br
|
|
|
8
8
|
|
|
9
9
|
```typescript
|
|
10
10
|
import { Workspace, LocalSandbox } from '@mastra/core/workspace'
|
|
11
|
+
import { Memory } from '@mastra/memory'
|
|
11
12
|
import { BrowserViewer } from '@mastra/browser-viewer'
|
|
12
13
|
import { Agent } from '@mastra/core/agent'
|
|
13
14
|
|
|
@@ -26,6 +27,7 @@ const browserAgent = new Agent({
|
|
|
26
27
|
model: 'openai/gpt-5.5',
|
|
27
28
|
workspace,
|
|
28
29
|
instructions: 'You are a web automation assistant.',
|
|
30
|
+
memory: new Memory(),
|
|
29
31
|
})
|
|
30
32
|
```
|
|
31
33
|
|
|
@@ -42,7 +44,7 @@ When `cdpUrl` is provided, `BrowserViewer` connects to the existing browser inst
|
|
|
42
44
|
|
|
43
45
|
## Constructor parameters
|
|
44
46
|
|
|
45
|
-
**cli** (`'agent-browser' | 'browser-use' | 'browse-cli'`): Which CLI the agent uses for browser automation. The CLI connects to Chrome via the CDP URL.
|
|
47
|
+
**cli** (`'agent-browser' | 'browser-use' | 'browse' | 'browse-cli'`): Which CLI the agent uses for browser automation. The CLI connects to Chrome via the CDP URL.
|
|
46
48
|
|
|
47
49
|
**headless** (`boolean`): Whether to run Chrome in headless mode. (Default: `true`)
|
|
48
50
|
|
|
@@ -72,7 +74,7 @@ When `cdpUrl` is provided, `BrowserViewer` connects to the existing browser inst
|
|
|
72
74
|
|
|
73
75
|
**providerType** (`'cli'`): Always 'cli'. Distinguishes BrowserViewer from SDK-based providers like AgentBrowser.
|
|
74
76
|
|
|
75
|
-
**cli** (`'agent-browser' | 'browser-use' | 'browse-cli'`): The CLI provider this instance is configured for.
|
|
77
|
+
**cli** (`'agent-browser' | 'browser-use' | 'browse' | 'browse-cli'`): The CLI provider this instance is configured for.
|
|
76
78
|
|
|
77
79
|
**status** (`BrowserStatus`): Current browser status: 'pending', 'launching', 'ready', 'error', 'closing', or 'closed'.
|
|
78
80
|
|
|
@@ -192,9 +194,9 @@ await viewer.injectKeyboardEvent({
|
|
|
192
194
|
|
|
193
195
|
Each CLI must be installed separately. Each also publishes a [skill](https://mastra.ai/docs/workspace/skills) that teaches the agent its commands and workflows. When a CLI command runs through `workspace_execute_command`, Mastra detects it and injects the CDP URL automatically using the correct flag.
|
|
194
196
|
|
|
195
|
-
Unlike SDK providers, `BrowserViewer`
|
|
197
|
+
Unlike SDK providers, `BrowserViewer` doesn't provide agent tools. The agent uses CLI commands through `workspace_execute_command` instead.
|
|
196
198
|
|
|
197
|
-
### [agent-browser](https://www.npmjs.com/package/agent-browser)
|
|
199
|
+
### [`agent-browser`](https://www.npmjs.com/package/agent-browser)
|
|
198
200
|
|
|
199
201
|
Config value: `'agent-browser'` · CDP flag: `--cdp`
|
|
200
202
|
|
|
@@ -203,7 +205,7 @@ npm install -g agent-browser
|
|
|
203
205
|
npx skills add vercel-labs/agent-browser
|
|
204
206
|
```
|
|
205
207
|
|
|
206
|
-
### [browser-use](https://pypi.org/project/browser-use/)
|
|
208
|
+
### [`browser-use`](https://pypi.org/project/browser-use/)
|
|
207
209
|
|
|
208
210
|
Config value: `'browser-use'` · CDP flag: `--cdp-url`
|
|
209
211
|
|
|
@@ -212,13 +214,13 @@ pip install browser-use
|
|
|
212
214
|
npx skills add browser-use/browser-use --skill browser-use
|
|
213
215
|
```
|
|
214
216
|
|
|
215
|
-
### [browse
|
|
217
|
+
### [`browse`](https://www.npmjs.com/package/browse) (command: `browse`)
|
|
216
218
|
|
|
217
|
-
Config value: `'browse
|
|
219
|
+
Config value: `'browse'` · CDP flag: `--ws`
|
|
218
220
|
|
|
219
221
|
```bash
|
|
220
|
-
npm install -g
|
|
221
|
-
|
|
222
|
+
npm install -g browse
|
|
223
|
+
browse skills install
|
|
222
224
|
```
|
|
223
225
|
|
|
224
226
|
## Related
|
|
@@ -248,7 +248,7 @@ Capture a screenshot of the current page as PNG (viewport by default; set `fullP
|
|
|
248
248
|
|
|
249
249
|
| Parameter | Type | Description |
|
|
250
250
|
| ---------- | --------- | ---------------------------------------------------------------------------------------- |
|
|
251
|
-
| `fullPage` | `boolean` | Capture the full scrollable page instead of
|
|
251
|
+
| `fullPage` | `boolean` | Capture the full scrollable page instead of only the viewport (optional, default: false) |
|
|
252
252
|
|
|
253
253
|
### `stagehand_close`
|
|
254
254
|
|
|
@@ -1222,7 +1222,9 @@ Prints help message and exits.
|
|
|
1222
1222
|
|
|
1223
1223
|
By default, Mastra collects anonymous information about your project like your OS, Mastra version or Node.js version. You can read the [source code](https://github.com/mastra-ai/mastra/blob/main/packages/cli/src/analytics/index.ts) to check what's collected.
|
|
1224
1224
|
|
|
1225
|
-
|
|
1225
|
+
When a server started with `mastra dev` or `mastra start` has observability metrics enabled, Mastra also sends anonymous, aggregated model usage at startup: input and output token counts per provider and model, plus the command (`dev` or `start`) and `NODE_ENV`. No prompts, responses, or other message content is ever sent. You can read the [source code](https://github.com/mastra-ai/mastra/blob/main/packages/core/src/telemetry/usage-telemetry.ts) to check what's collected.
|
|
1226
|
+
|
|
1227
|
+
You can opt out of all CLI and usage analytics by setting an environment variable:
|
|
1226
1228
|
|
|
1227
1229
|
```bash
|
|
1228
1230
|
MASTRA_TELEMETRY_DISABLED=1
|
|
@@ -130,7 +130,7 @@ Use [`client.conversations`](https://mastra.ai/reference/client-js/conversations
|
|
|
130
130
|
|
|
131
131
|
`response.tools` contains the configured function definitions available for the request.
|
|
132
132
|
|
|
133
|
-
If the model calls a function, that activity
|
|
133
|
+
If the model calls a function, that activity is included in `response.output` as `function_call` and `function_call_output` items alongside the final assistant `message`.
|
|
134
134
|
|
|
135
135
|
When `stream: true`, function calls are also emitted as Responses stream events. Read `response.function_call_arguments.delta` events for partial argument chunks and prefer `response.function_call_arguments.done` for the finalized arguments payload and tool name. Read `response.output_item.done` events for completed `function_call` and `function_call_output` items. Tool output items use `<toolCallId>:output` IDs.
|
|
136
136
|
|
|
@@ -167,7 +167,7 @@ const response = await client.responses.create({
|
|
|
167
167
|
|
|
168
168
|
## Provider-backed requests
|
|
169
169
|
|
|
170
|
-
Use `providerOptions` when you need provider-specific options that Mastra
|
|
170
|
+
Use `providerOptions` when you need provider-specific options that Mastra doesn't normalize at the Responses layer.
|
|
171
171
|
|
|
172
172
|
```typescript
|
|
173
173
|
const response = await client.responses.create({
|
|
@@ -243,7 +243,7 @@ await mastraClient.pauseSchedule('daily-report')
|
|
|
243
243
|
|
|
244
244
|
### `resumeSchedule()`
|
|
245
245
|
|
|
246
|
-
Resume a paused schedule. The next fire time is recomputed from now, so a long-paused schedule
|
|
246
|
+
Resume a paused schedule. The next fire time is recomputed from now, so a long-paused schedule doesn't fire a backlog. Returns the updated schedule. Idempotent.
|
|
247
247
|
|
|
248
248
|
```typescript
|
|
249
249
|
await mastraClient.resumeSchedule('daily-report')
|
|
@@ -637,7 +637,7 @@ export const mastra = new Mastra({
|
|
|
637
637
|
**Type:** `object`\
|
|
638
638
|
**Default:** `undefined`
|
|
639
639
|
|
|
640
|
-
MCP transport options applied to all MCP HTTP and SSE routes. Use this to enable stateless mode for serverless environments (Cloudflare Workers, Vercel Edge, AWS Lambda, etc.) where persistent connections and in-memory session state
|
|
640
|
+
MCP transport options applied to all MCP HTTP and SSE routes. Use this to enable stateless mode for serverless environments (Cloudflare Workers, Vercel Edge, AWS Lambda, etc.) where persistent connections and in-memory session state aren't available.
|
|
641
641
|
|
|
642
642
|
| Property | Type | Default | Description |
|
|
643
643
|
| -------------------- | -------------- | ----------- | ---------------------------------------------------- |
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Mastra.removeWorkspace()
|
|
2
|
+
|
|
3
|
+
The `.removeWorkspace()` method removes a workspace from the Mastra runtime registry. Pass `{ destroy: true }` to destroy the workspace before it's removed.
|
|
4
|
+
|
|
5
|
+
## Usage example
|
|
6
|
+
|
|
7
|
+
```typescript
|
|
8
|
+
await mastra.removeWorkspace('workspace-123', { destroy: true })
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Parameters
|
|
12
|
+
|
|
13
|
+
**id** (`string`): The ID of the workspace to remove.
|
|
14
|
+
|
|
15
|
+
**options** (`{ destroy?: boolean }`): Optional cleanup behavior. Set \`destroy\` to \`true\` to call \`workspace.destroy()\` before removing the workspace.
|
|
16
|
+
|
|
17
|
+
## Returns
|
|
18
|
+
|
|
19
|
+
**removed** (`Promise<boolean>`): \`true\` when a workspace was removed. \`false\` when no workspace exists for the ID.
|
|
20
|
+
|
|
21
|
+
When `destroy` is `true` and `workspace.destroy()` throws, the call rejects with that error and the workspace remains registered so the caller can retry cleanup.
|
|
22
|
+
|
|
23
|
+
## Related
|
|
24
|
+
|
|
25
|
+
- [Workspace overview](https://mastra.ai/docs/workspace/overview)
|
|
26
|
+
- [Workspace class](https://mastra.ai/reference/workspace/workspace-class)
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
`BrowserProvider` is the interface a package implements to register a browser with [`MastraEditor`](https://mastra.ai/reference/editor/mastra-editor). The editor calls `createBrowser(config)` at agent hydration time, using the `provider` id from the stored [`StorageBrowserRef`](https://mastra.ai/reference/editor/storage-browser-ref) as the lookup key.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
No built-in browser providers are available. To use the `browser` feature in the Agent Builder, register a provider package (for example, `@mastra/stagehand`).
|
|
6
6
|
|
|
7
7
|
## Usage example
|
|
8
8
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
`StorageBrowserRef` is the inline browser configuration attached to a stored agent. The `provider` id is resolved at hydration time against the [`BrowserProvider`](https://mastra.ai/reference/editor/browser-provider) registered on [`MastraEditor.browsers`](https://mastra.ai/reference/editor/mastra-editor).
|
|
4
4
|
|
|
5
|
-
It
|
|
5
|
+
It's the type used by [`BuilderAgentDefaults.browser`](https://mastra.ai/reference/editor/agent-builder/builder-agent-defaults) and by stored agent records.
|
|
6
6
|
|
|
7
7
|
## Usage example
|
|
8
8
|
|
|
@@ -36,7 +36,7 @@ new MastraEditor({
|
|
|
36
36
|
type StorageBrowserRef = { type: 'inline'; config: StorageBrowserConfig }
|
|
37
37
|
```
|
|
38
38
|
|
|
39
|
-
|
|
39
|
+
A `{ type: 'id' }` variant for browsers isn't available — they're always inlined.
|
|
40
40
|
|
|
41
41
|
## Properties
|
|
42
42
|
|
|
@@ -70,7 +70,7 @@ The shape embedded under `config`. Defined in `@mastra/core/storage`.
|
|
|
70
70
|
|
|
71
71
|
## Hydration
|
|
72
72
|
|
|
73
|
-
`StorageBrowserRef` is resolved lazily on `mastra.editor.agent.getById()`. The editor looks up `config.provider` on `MastraEditor.browsers` and calls `provider.createBrowser(config)`. If the provider
|
|
73
|
+
`StorageBrowserRef` is resolved lazily on `mastra.editor.agent.getById()`. The editor looks up `config.provider` on `MastraEditor.browsers` and calls `provider.createBrowser(config)`. If the provider isn't registered, the editor logs a warning and returns `undefined` — the agent still loads, but without a browser.
|
|
74
74
|
|
|
75
75
|
## Related
|
|
76
76
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
`StorageWorkspaceRef` is the discriminated union used to attach a workspace to a stored agent. It either points at a workspace registered on the Mastra runtime by ID, or embeds a workspace snapshot inline.
|
|
4
4
|
|
|
5
|
-
It
|
|
5
|
+
It's the type used by [`BuilderAgentDefaults.workspace`](https://mastra.ai/reference/editor/agent-builder/builder-agent-defaults) and by stored agent records.
|
|
6
6
|
|
|
7
7
|
## Usage example
|
|
8
8
|
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# Rubric scorer
|
|
2
|
+
|
|
3
|
+
**Added in:** `@mastra/evals@1.3.0`
|
|
4
|
+
|
|
5
|
+
The `createRubricScorer()` function creates an LLM-as-judge scorer that grades an agent's output against a rubric (a checklist of criteria). It returns a **binary** score: `1` only when every required criterion is satisfied, otherwise `0`. The `reason` lists each criterion's verdict so the agent knows exactly what to fix.
|
|
6
|
+
|
|
7
|
+
This scorer is designed to drop into [`isTaskComplete`](https://mastra.ai/reference/streaming/agents/stream). Because `isTaskComplete` treats `score === 1` as "task complete" and injects the `reason` back into the conversation as feedback, the agent keeps iterating until the rubric is satisfied (or `maxSteps` is reached).
|
|
8
|
+
|
|
9
|
+
## Parameters
|
|
10
|
+
|
|
11
|
+
**model** (`MastraModelConfig`): The language model used to grade the output against the rubric. A smaller, cheaper model is usually sufficient for grading.
|
|
12
|
+
|
|
13
|
+
**criteria** (`RubricCriterion[] | string`): The rubric to grade against. A string is treated as a newline-delimited checklist (each line becomes a required criterion). If omitted, the rubric is read at run time from a \`rubric\` value on request/additional context; if none resolves, the scorer is a no-op and returns 1.
|
|
14
|
+
|
|
15
|
+
**options** (`RubricScorerOptions`): Configuration options for the scorer
|
|
16
|
+
|
|
17
|
+
## `.run()` returns
|
|
18
|
+
|
|
19
|
+
**score** (`number`): 1 when every required criterion is satisfied, otherwise 0 (multiplied by scale).
|
|
20
|
+
|
|
21
|
+
**reason** (`string`): A per-criterion explanation listing which criteria are met or unmet and why. This is the text that isTaskComplete injects back into the conversation as feedback.
|
|
22
|
+
|
|
23
|
+
## Usage with isTaskComplete
|
|
24
|
+
|
|
25
|
+
Define the rubric once, attach the scorer to `isTaskComplete`, and the agent self-corrects until the rubric is satisfied:
|
|
26
|
+
|
|
27
|
+
```typescript
|
|
28
|
+
import { Agent } from '@mastra/core/agent'
|
|
29
|
+
import { createRubricScorer } from '@mastra/evals/scorers/prebuilt'
|
|
30
|
+
|
|
31
|
+
const supervisor = new Agent({
|
|
32
|
+
id: 'supervisor',
|
|
33
|
+
instructions: `You coordinate research and writing using specialized agents. Delegate to research-agent for facts, then writing-agent for content.`,
|
|
34
|
+
model: 'openai/gpt-5.5',
|
|
35
|
+
agents: { researchAgent, writingAgent },
|
|
36
|
+
})
|
|
37
|
+
|
|
38
|
+
const rubricScorer = createRubricScorer({
|
|
39
|
+
model: 'openai/gpt-5-mini',
|
|
40
|
+
criteria: [
|
|
41
|
+
{ description: 'The response includes an analysis section' },
|
|
42
|
+
{ description: 'The response includes concrete recommendations' },
|
|
43
|
+
],
|
|
44
|
+
})
|
|
45
|
+
|
|
46
|
+
const stream = await supervisor.stream('Research AI in education', {
|
|
47
|
+
maxSteps: 10,
|
|
48
|
+
isTaskComplete: {
|
|
49
|
+
scorers: [rubricScorer],
|
|
50
|
+
strategy: 'all',
|
|
51
|
+
},
|
|
52
|
+
})
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## String rubric
|
|
56
|
+
|
|
57
|
+
A newline-delimited string is parsed into criteria, with common list markers (`-`, `*`, `1.`) stripped. Every line becomes a required criterion:
|
|
58
|
+
|
|
59
|
+
```typescript
|
|
60
|
+
const rubricScorer = createRubricScorer({
|
|
61
|
+
model: 'openai/gpt-5-mini',
|
|
62
|
+
criteria: `- All tests pass in the test suite
|
|
63
|
+
- The function is named find_duplicates and accepts a single list argument`,
|
|
64
|
+
})
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Optional criteria
|
|
68
|
+
|
|
69
|
+
Mark a criterion as optional to have it graded and reported without gating completion:
|
|
70
|
+
|
|
71
|
+
```typescript
|
|
72
|
+
const rubricScorer = createRubricScorer({
|
|
73
|
+
model: 'openai/gpt-5-mini',
|
|
74
|
+
criteria: [
|
|
75
|
+
{ description: 'Includes an analysis section', required: true },
|
|
76
|
+
{ description: 'Includes citations', required: false },
|
|
77
|
+
],
|
|
78
|
+
})
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Dynamic rubric per run
|
|
82
|
+
|
|
83
|
+
When no `criteria` is passed to the factory, the scorer resolves a `rubric` value from the run's request context, additional context, or input. This lets a single scorer instance grade different rubrics per run without rebuilding it:
|
|
84
|
+
|
|
85
|
+
```typescript
|
|
86
|
+
const rubricScorer = createRubricScorer({
|
|
87
|
+
model: 'openai/gpt-5-mini',
|
|
88
|
+
})
|
|
89
|
+
|
|
90
|
+
await supervisor.stream('Write find_duplicates', {
|
|
91
|
+
isTaskComplete: { scorers: [rubricScorer] },
|
|
92
|
+
requestContext: {
|
|
93
|
+
rubric: '- All tests pass\n- The function is named find_duplicates',
|
|
94
|
+
},
|
|
95
|
+
})
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
If no rubric resolves, the scorer returns `1` and doesn't gate the loop.
|
|
99
|
+
|
|
100
|
+
## Scoring details
|
|
101
|
+
|
|
102
|
+
The scorer runs in two phases:
|
|
103
|
+
|
|
104
|
+
1. **Grade**: The judge model evaluates each criterion independently and returns a per-criterion verdict (`satisfied` / not) with reasoning.
|
|
105
|
+
2. **Score**: The result is `1` only when every required criterion is `satisfied`, otherwise `0`. If no criteria are marked required, all criteria are treated as required.
|
|
106
|
+
|
|
107
|
+
The `reason` summarizes the overall result and lists each criterion with its verdict, so a failing grade gives the agent targeted, actionable feedback rather than a generic "try again".
|
|
108
|
+
|
|
109
|
+
## Related
|
|
110
|
+
|
|
111
|
+
- [isTaskComplete on stream()](https://mastra.ai/reference/streaming/agents/stream)
|
|
112
|
+
- [Supervisor agents](https://mastra.ai/docs/agents/supervisor-agents)
|
|
113
|
+
- [createScorer](https://mastra.ai/reference/evals/create-scorer)
|
|
@@ -31,7 +31,7 @@ workflow_run
|
|
|
31
31
|
|
|
32
32
|
### Fallback extraction
|
|
33
33
|
|
|
34
|
-
When storage
|
|
34
|
+
When storage isn't available, the pipeline falls back to:
|
|
35
35
|
|
|
36
36
|
- **Agents:** `extractTrajectory()` — Extracts `ToolCallStep` entries from `toolInvocations` in the agent's message output. Produces a flat list of tool calls.
|
|
37
37
|
- **Workflows:** `extractWorkflowTrajectory()` — Extracts `WorkflowStepStep` entries from `stepResults`. Produces a flat list of workflow steps.
|
|
@@ -176,7 +176,7 @@ In this example, the parent workflow requires strict ordering of its steps, but
|
|
|
176
176
|
### Use the LLM-based scorer when:
|
|
177
177
|
|
|
178
178
|
- You need **semantic understanding** of whether steps were appropriate
|
|
179
|
-
- The optimal trajectory
|
|
179
|
+
- The optimal trajectory **isn't predetermined** (evaluate based on task requirements)
|
|
180
180
|
- You want to detect **unnecessary, redundant, or missing** steps
|
|
181
181
|
- You need **explanations** for scoring decisions
|
|
182
182
|
- You are evaluating **production agent behavior**
|
|
@@ -360,7 +360,7 @@ console.log(result.scores.trajectory['trajectory-accuracy'])
|
|
|
360
360
|
|
|
361
361
|
### Comparing step data
|
|
362
362
|
|
|
363
|
-
Validates
|
|
363
|
+
Validates the step names and step-specific data. For tool calls, this compares `toolArgs` and `toolResult`. For workflow steps, this compares `output`.
|
|
364
364
|
|
|
365
365
|
```typescript
|
|
366
366
|
const scorer = createTrajectoryAccuracyScorerCode({
|
|
@@ -166,10 +166,9 @@ const displayState = harness.getDisplayState()
|
|
|
166
166
|
|
|
167
167
|
Restore the task portion of `HarnessDisplayState` after a UI replays persisted task tool history. This emits `display_state_changed` without emitting a live `task_updated` event.
|
|
168
168
|
|
|
169
|
-
|
|
169
|
+
The task list itself is held in the thread-scoped `tasks` storage domain (the task store) and projected onto the agent state-signal lane (`stateId: "tasks"`), not in Harness state, so this method only updates the display snapshot. Task tools read and write the task store directly; you no longer need to seed `setState({ tasks })`.
|
|
170
170
|
|
|
171
171
|
```typescript
|
|
172
|
-
await harness.setState({ tasks: replayedTasks })
|
|
173
172
|
harness.restoreDisplayTasks(replayedTasks)
|
|
174
173
|
```
|
|
175
174
|
|
|
@@ -476,31 +475,50 @@ harness.respondToToolApproval({ decision: 'approve' })
|
|
|
476
475
|
harness.respondToToolApproval({ decision: 'decline' })
|
|
477
476
|
```
|
|
478
477
|
|
|
479
|
-
###
|
|
478
|
+
### Tool suspensions and plans
|
|
480
479
|
|
|
481
|
-
#### `
|
|
480
|
+
#### `respondToToolSuspension({ resumeData, toolCallId?, requestContext? })`
|
|
482
481
|
|
|
483
|
-
Respond to a pending
|
|
482
|
+
Respond to a pending tool suspension. Interactive built-in tools such as `ask_user` and `request_access` pause through the native tool-suspension primitive, which emits a `tool_suspended` event carrying `toolCallId`, `toolName`, and `suspendPayload`. Pass `resumeData` to resume the suspended tool with the user's response.
|
|
483
|
+
|
|
484
|
+
Provide `toolCallId` to select which suspension to resume. It is required when more than one tool is suspended at the same time (for example, parallel `ask_user` calls). When omitted, it resolves to the sole pending suspension.
|
|
484
485
|
|
|
485
486
|
```typescript
|
|
486
|
-
harness.
|
|
487
|
+
harness.subscribe(event => {
|
|
488
|
+
if (event.type === 'tool_suspended' && event.toolName === 'ask_user') {
|
|
489
|
+
const { question } = event.suspendPayload as { question: string }
|
|
490
|
+
// Show `question` to the user, then resume the tool with their answer.
|
|
491
|
+
harness.respondToToolSuspension({
|
|
492
|
+
toolCallId: event.toolCallId,
|
|
493
|
+
resumeData: 'Yes, proceed with the refactor',
|
|
494
|
+
})
|
|
495
|
+
}
|
|
496
|
+
})
|
|
487
497
|
```
|
|
488
498
|
|
|
489
499
|
For multi-select questions, pass the selected option labels as a string array.
|
|
490
500
|
|
|
491
501
|
```typescript
|
|
492
|
-
harness.
|
|
502
|
+
harness.respondToToolSuspension({
|
|
503
|
+
toolCallId: event.toolCallId,
|
|
504
|
+
resumeData: ['Add tests', 'Update docs'],
|
|
505
|
+
})
|
|
493
506
|
```
|
|
494
507
|
|
|
495
|
-
####
|
|
508
|
+
#### Responding to a submitted plan
|
|
509
|
+
|
|
510
|
+
The `submit_plan` built-in tool pauses via the native tool-suspension primitive, so it surfaces through the same `tool_suspended` event as other interactive tools. Resume it with [`respondToToolSuspension`](#respondtotoolsuspension-resumedata-toolcallid-requestcontext-), passing a `resumeData` object with `action` (`'approved'` or `'rejected'`) and an optional `feedback` string.
|
|
496
511
|
|
|
497
|
-
|
|
512
|
+
On approval, the Harness automatically switches to its default (execution) mode. On rejection, the plan-mode run resumes so the agent can revise and submit again.
|
|
498
513
|
|
|
499
514
|
```typescript
|
|
500
|
-
harness.
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
515
|
+
harness.respondToToolSuspension({
|
|
516
|
+
toolCallId: event.toolCallId,
|
|
517
|
+
resumeData: { action: 'approved' },
|
|
518
|
+
})
|
|
519
|
+
harness.respondToToolSuspension({
|
|
520
|
+
toolCallId: event.toolCallId,
|
|
521
|
+
resumeData: { action: 'rejected', feedback: 'Needs more detail' },
|
|
504
522
|
})
|
|
505
523
|
```
|
|
506
524
|
|
|
@@ -767,7 +785,7 @@ unsubscribe()
|
|
|
767
785
|
|
|
768
786
|
Returns: `() => void`
|
|
769
787
|
|
|
770
|
-
`subscribeDisplayState()`
|
|
788
|
+
`subscribeDisplayState()` doesn't call the listener immediately. Call [`getDisplayState`](#getdisplaystate) first if the UI needs an initial render before the next harness event.
|
|
771
789
|
|
|
772
790
|
#### `subscribe(listener)`
|
|
773
791
|
|
|
@@ -798,48 +816,46 @@ unsubscribe()
|
|
|
798
816
|
|
|
799
817
|
The harness emits events through registered listeners. The following table lists the available event types:
|
|
800
818
|
|
|
801
|
-
| Event type | Description
|
|
802
|
-
| -------------------------- |
|
|
803
|
-
| `mode_changed` | The active mode changed.
|
|
804
|
-
| `model_changed` | The active model changed.
|
|
805
|
-
| `thread_changed` | The active thread changed.
|
|
806
|
-
| `thread_created` | A new thread was created.
|
|
807
|
-
| `thread_deleted` | A thread was deleted.
|
|
808
|
-
| `state_changed` | Harness state was updated.
|
|
809
|
-
| `agent_start` | The agent started processing.
|
|
810
|
-
| `agent_end` | The agent finished processing.
|
|
811
|
-
| `message_start` | A new message started streaming.
|
|
812
|
-
| `message_update` | A message was updated with new content.
|
|
813
|
-
| `message_end` | A message finished streaming.
|
|
814
|
-
| `tool_start` | A tool call started.
|
|
815
|
-
| `tool_approval_required` | A tool call requires user approval.
|
|
816
|
-
| `
|
|
817
|
-
| `
|
|
818
|
-
| `
|
|
819
|
-
| `
|
|
820
|
-
| `
|
|
821
|
-
| `
|
|
822
|
-
| `
|
|
823
|
-
| `
|
|
824
|
-
| `
|
|
825
|
-
| `
|
|
826
|
-
| `
|
|
827
|
-
| `
|
|
828
|
-
| `
|
|
829
|
-
| `
|
|
830
|
-
| `
|
|
831
|
-
| `
|
|
832
|
-
| `
|
|
833
|
-
| `
|
|
834
|
-
| `
|
|
835
|
-
| `
|
|
836
|
-
| `
|
|
837
|
-
| `
|
|
838
|
-
| `
|
|
839
|
-
| `
|
|
840
|
-
| `
|
|
841
|
-
| `subagent_model_changed` | A subagent's model changed. |
|
|
842
|
-
| `task_updated` | A task list was updated. |
|
|
819
|
+
| Event type | Description |
|
|
820
|
+
| -------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
821
|
+
| `mode_changed` | The active mode changed. |
|
|
822
|
+
| `model_changed` | The active model changed. |
|
|
823
|
+
| `thread_changed` | The active thread changed. |
|
|
824
|
+
| `thread_created` | A new thread was created. |
|
|
825
|
+
| `thread_deleted` | A thread was deleted. |
|
|
826
|
+
| `state_changed` | Harness state was updated. |
|
|
827
|
+
| `agent_start` | The agent started processing. |
|
|
828
|
+
| `agent_end` | The agent finished processing. |
|
|
829
|
+
| `message_start` | A new message started streaming. |
|
|
830
|
+
| `message_update` | A message was updated with new content. |
|
|
831
|
+
| `message_end` | A message finished streaming. |
|
|
832
|
+
| `tool_start` | A tool call started. |
|
|
833
|
+
| `tool_approval_required` | A tool call requires user approval. |
|
|
834
|
+
| `tool_suspended` | A tool paused via the native tool-suspension primitive (for example `ask_user`, `request_access`, or `submit_plan`). Includes `toolCallId`, `toolName`, and `suspendPayload`. Resume it with [`respondToToolSuspension`](#respondtotoolsuspension-resumedata-toolcallid-requestcontext-). |
|
|
835
|
+
| `tool_update` | A tool call was updated with progress. |
|
|
836
|
+
| `tool_end` | A tool call finished. |
|
|
837
|
+
| `tool_input_start` | Tool input started streaming. |
|
|
838
|
+
| `tool_input_delta` | Tool input received a streaming delta. |
|
|
839
|
+
| `tool_input_end` | Tool input finished streaming. |
|
|
840
|
+
| `usage_update` | Token usage was updated. |
|
|
841
|
+
| `error` | An error occurred. |
|
|
842
|
+
| `info` | An informational message was emitted. |
|
|
843
|
+
| `follow_up_queued` | A follow-up message was queued. |
|
|
844
|
+
| `workspace_status_changed` | The workspace status changed. |
|
|
845
|
+
| `workspace_ready` | The workspace finished initializing. |
|
|
846
|
+
| `workspace_error` | The workspace encountered an error. |
|
|
847
|
+
| `om_status` | Observational Memory status update. |
|
|
848
|
+
| `om_observation_start` | An observation started. |
|
|
849
|
+
| `om_observation_end` | An observation completed. |
|
|
850
|
+
| `om_reflection_start` | A reflection started. |
|
|
851
|
+
| `om_reflection_end` | A reflection completed. |
|
|
852
|
+
| `subagent_start` | A subagent started processing. |
|
|
853
|
+
| `subagent_text_delta` | A subagent emitted a text delta. |
|
|
854
|
+
| `subagent_tool_start` | A subagent started a tool call. |
|
|
855
|
+
| `subagent_tool_end` | A subagent finished a tool call. |
|
|
856
|
+
| `subagent_end` | A subagent finished processing. |
|
|
857
|
+
| `subagent_model_changed` | A subagent's model changed. |
|
|
858
|
+
| `task_updated` | A task list was updated. |
|
|
843
859
|
|
|
844
860
|
## Built-in tools
|
|
845
861
|
|
|
@@ -859,15 +875,18 @@ The harness provides built-in tools to agents in every mode:
|
|
|
859
875
|
|
|
860
876
|
The `ask_user` tool accepts `options` for choice prompts. Set `selectionMode` to `single_select` to let the user pick one option, or `multi_select` to let the user pick multiple options. When `options` are provided and `selectionMode` is omitted, the prompt defaults to `single_select`. Omit `options` for free-text questions.
|
|
861
877
|
|
|
862
|
-
The following example demonstrates a multi-select response handler. The UI reads `event.
|
|
878
|
+
The following example demonstrates a multi-select response handler. The tool pauses through the `tool_suspended` event, the UI reads `selectionMode` from `event.suspendPayload`, lets the user choose multiple options, then returns a string array with `respondToToolSuspension()`.
|
|
863
879
|
|
|
864
880
|
```typescript
|
|
865
881
|
harness.subscribe(event => {
|
|
866
|
-
if (event.type === '
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
882
|
+
if (event.type === 'tool_suspended' && event.toolName === 'ask_user') {
|
|
883
|
+
const { selectionMode } = event.suspendPayload as { selectionMode?: string }
|
|
884
|
+
if (selectionMode === 'multi_select') {
|
|
885
|
+
harness.respondToToolSuspension({
|
|
886
|
+
toolCallId: event.toolCallId,
|
|
887
|
+
resumeData: ['Add tests', 'Update docs'],
|
|
888
|
+
})
|
|
889
|
+
}
|
|
871
890
|
}
|
|
872
891
|
})
|
|
873
892
|
```
|