@mastra/mcp-docs-server 1.0.0-beta.5 → 1.0.0-beta.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/organized/changelogs/%40mastra%2Fagent-builder.md +9 -9
- package/.docs/organized/changelogs/%40mastra%2Fai-sdk.md +67 -67
- package/.docs/organized/changelogs/%40mastra%2Fclickhouse.md +26 -26
- package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +53 -53
- package/.docs/organized/changelogs/%40mastra%2Fcloudflare-d1.md +26 -26
- package/.docs/organized/changelogs/%40mastra%2Fcloudflare.md +27 -27
- package/.docs/organized/changelogs/%40mastra%2Fconvex.md +29 -0
- package/.docs/organized/changelogs/%40mastra%2Fcore.md +274 -274
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloud.md +15 -15
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-netlify.md +12 -12
- package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +65 -65
- package/.docs/organized/changelogs/%40mastra%2Fduckdb.md +42 -0
- package/.docs/organized/changelogs/%40mastra%2Fdynamodb.md +26 -26
- package/.docs/organized/changelogs/%40mastra%2Felasticsearch.md +52 -0
- package/.docs/organized/changelogs/%40mastra%2Fevals.md +12 -12
- package/.docs/organized/changelogs/%40mastra%2Flance.md +26 -26
- package/.docs/organized/changelogs/%40mastra%2Flibsql.md +24 -24
- package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +9 -9
- package/.docs/organized/changelogs/%40mastra%2Fmcp.md +84 -84
- package/.docs/organized/changelogs/%40mastra%2Fmemory.md +36 -36
- package/.docs/organized/changelogs/%40mastra%2Fmongodb.md +26 -26
- package/.docs/organized/changelogs/%40mastra%2Fmssql.md +27 -27
- package/.docs/organized/changelogs/%40mastra%2Fpg.md +28 -28
- package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +47 -47
- package/.docs/organized/changelogs/%40mastra%2Frag.md +43 -43
- package/.docs/organized/changelogs/%40mastra%2Freact.md +9 -0
- package/.docs/organized/changelogs/%40mastra%2Fschema-compat.md +6 -0
- package/.docs/organized/changelogs/%40mastra%2Fserver.md +56 -56
- package/.docs/organized/changelogs/%40mastra%2Fupstash.md +26 -26
- package/.docs/organized/changelogs/%40mastra%2Fvoice-google.md +19 -19
- package/.docs/organized/changelogs/create-mastra.md +9 -9
- package/.docs/organized/changelogs/mastra.md +17 -17
- package/.docs/organized/code-examples/agui.md +1 -0
- package/.docs/organized/code-examples/ai-sdk-v5.md +1 -0
- package/.docs/organized/code-examples/mcp-server-adapters.md +721 -0
- package/.docs/organized/code-examples/server-app-access.md +342 -0
- package/.docs/raw/agents/agent-approval.mdx +189 -0
- package/.docs/raw/agents/guardrails.mdx +13 -9
- package/.docs/raw/agents/networks.mdx +1 -0
- package/.docs/raw/agents/overview.mdx +23 -58
- package/.docs/raw/agents/processors.mdx +279 -0
- package/.docs/raw/deployment/cloud-providers/index.mdx +19 -26
- package/.docs/raw/deployment/cloud-providers/netlify-deployer.mdx +44 -13
- package/.docs/raw/evals/running-in-ci.mdx +0 -2
- package/.docs/raw/{guides/getting-started → getting-started}/manual-install.mdx +2 -2
- package/.docs/raw/getting-started/start.mdx +1 -1
- package/.docs/raw/guides/build-your-ui/ai-sdk-ui.mdx +8 -0
- package/.docs/raw/guides/getting-started/quickstart.mdx +1 -1
- package/.docs/raw/guides/guide/whatsapp-chat-bot.mdx +421 -0
- package/.docs/raw/guides/index.mdx +3 -35
- package/.docs/raw/guides/migrations/upgrade-to-v1/agent.mdx +11 -0
- package/.docs/raw/guides/migrations/upgrade-to-v1/workflows.mdx +29 -0
- package/.docs/raw/index.mdx +1 -1
- package/.docs/raw/memory/memory-processors.mdx +265 -79
- package/.docs/raw/memory/working-memory.mdx +10 -2
- package/.docs/raw/observability/overview.mdx +0 -1
- package/.docs/raw/observability/tracing/bridges/otel.mdx +176 -0
- package/.docs/raw/observability/tracing/exporters/arize.mdx +17 -0
- package/.docs/raw/observability/tracing/exporters/braintrust.mdx +19 -0
- package/.docs/raw/observability/tracing/exporters/langfuse.mdx +20 -0
- package/.docs/raw/observability/tracing/exporters/langsmith.mdx +12 -0
- package/.docs/raw/observability/tracing/exporters/otel.mdx +5 -4
- package/.docs/raw/observability/tracing/overview.mdx +71 -6
- package/.docs/raw/observability/tracing/processors/sensitive-data-filter.mdx +0 -1
- package/.docs/raw/rag/retrieval.mdx +23 -6
- package/.docs/raw/rag/vector-databases.mdx +93 -2
- package/.docs/raw/reference/agents/generate.mdx +55 -6
- package/.docs/raw/reference/agents/network.mdx +44 -0
- package/.docs/raw/reference/client-js/memory.mdx +43 -0
- package/.docs/raw/reference/client-js/workflows.mdx +92 -63
- package/.docs/raw/reference/deployer/netlify.mdx +1 -2
- package/.docs/raw/reference/evals/scorer-utils.mdx +362 -0
- package/.docs/raw/reference/index.mdx +1 -0
- package/.docs/raw/reference/observability/tracing/bridges/otel.mdx +150 -0
- package/.docs/raw/reference/observability/tracing/configuration.mdx +0 -4
- package/.docs/raw/reference/observability/tracing/exporters/arize.mdx +4 -0
- package/.docs/raw/reference/observability/tracing/exporters/langsmith.mdx +17 -1
- package/.docs/raw/reference/observability/tracing/exporters/otel.mdx +6 -0
- package/.docs/raw/reference/observability/tracing/instances.mdx +0 -4
- package/.docs/raw/reference/observability/tracing/interfaces.mdx +29 -4
- package/.docs/raw/reference/observability/tracing/spans.mdx +0 -4
- package/.docs/raw/reference/processors/language-detector.mdx +9 -2
- package/.docs/raw/reference/processors/message-history-processor.mdx +131 -0
- package/.docs/raw/reference/processors/moderation-processor.mdx +10 -3
- package/.docs/raw/reference/processors/pii-detector.mdx +10 -3
- package/.docs/raw/reference/processors/processor-interface.mdx +502 -0
- package/.docs/raw/reference/processors/prompt-injection-detector.mdx +9 -2
- package/.docs/raw/reference/processors/semantic-recall-processor.mdx +197 -0
- package/.docs/raw/reference/processors/system-prompt-scrubber.mdx +2 -2
- package/.docs/raw/reference/processors/tool-call-filter.mdx +125 -0
- package/.docs/raw/reference/processors/working-memory-processor.mdx +221 -0
- package/.docs/raw/reference/storage/cloudflare-d1.mdx +37 -0
- package/.docs/raw/reference/storage/convex.mdx +164 -0
- package/.docs/raw/reference/storage/lance.mdx +33 -0
- package/.docs/raw/reference/storage/libsql.mdx +37 -0
- package/.docs/raw/reference/storage/mongodb.mdx +39 -0
- package/.docs/raw/reference/storage/mssql.mdx +37 -0
- package/.docs/raw/reference/storage/postgresql.mdx +37 -0
- package/.docs/raw/reference/streaming/ChunkType.mdx +1 -1
- package/.docs/raw/reference/streaming/agents/stream.mdx +56 -1
- package/.docs/raw/reference/streaming/workflows/observeStream.mdx +7 -9
- package/.docs/raw/reference/streaming/workflows/{resumeStreamVNext.mdx → resumeStream.mdx} +51 -11
- package/.docs/raw/reference/streaming/workflows/stream.mdx +83 -24
- package/.docs/raw/reference/tools/mcp-client.mdx +74 -17
- package/.docs/raw/reference/vectors/convex.mdx +429 -0
- package/.docs/raw/reference/vectors/duckdb.mdx +462 -0
- package/.docs/raw/reference/vectors/elasticsearch.mdx +310 -0
- package/.docs/raw/reference/voice/google.mdx +159 -20
- package/.docs/raw/reference/workflows/run-methods/restart.mdx +142 -0
- package/.docs/raw/reference/workflows/run-methods/resume.mdx +44 -0
- package/.docs/raw/reference/workflows/run-methods/start.mdx +44 -0
- package/.docs/raw/reference/workflows/run.mdx +13 -5
- package/.docs/raw/reference/workflows/step.mdx +13 -0
- package/.docs/raw/reference/workflows/workflow.mdx +19 -0
- package/.docs/raw/server-db/mastra-server.mdx +30 -1
- package/.docs/raw/server-db/request-context.mdx +0 -1
- package/.docs/raw/server-db/storage.mdx +11 -0
- package/.docs/raw/streaming/overview.mdx +6 -6
- package/.docs/raw/streaming/tool-streaming.mdx +2 -2
- package/.docs/raw/streaming/workflow-streaming.mdx +5 -11
- package/.docs/raw/workflows/error-handling.mdx +1 -0
- package/.docs/raw/workflows/human-in-the-loop.mdx +4 -4
- package/.docs/raw/workflows/overview.mdx +56 -44
- package/.docs/raw/workflows/snapshots.mdx +1 -0
- package/.docs/raw/workflows/suspend-and-resume.mdx +85 -16
- package/.docs/raw/workflows/time-travel.mdx +313 -0
- package/.docs/raw/workflows/workflow-state.mdx +191 -0
- package/CHANGELOG.md +8 -0
- package/package.json +4 -4
- package/.docs/raw/agents/human-in-the-loop-with-tools.mdx +0 -91
- package/.docs/raw/reference/streaming/workflows/observeStreamVNext.mdx +0 -47
- package/.docs/raw/reference/streaming/workflows/streamVNext.mdx +0 -153
|
@@ -17,7 +17,7 @@ const workflows = await mastraClient.listWorkflows();
|
|
|
17
17
|
|
|
18
18
|
## Working with a Specific Workflow
|
|
19
19
|
|
|
20
|
-
Get an instance of a specific workflow
|
|
20
|
+
Get an instance of a specific workflow by its ID:
|
|
21
21
|
|
|
22
22
|
```typescript title="src/mastra/workflows/test-workflow.ts"
|
|
23
23
|
export const testWorkflow = createWorkflow({
|
|
@@ -26,12 +26,12 @@ export const testWorkflow = createWorkflow({
|
|
|
26
26
|
```
|
|
27
27
|
|
|
28
28
|
```typescript
|
|
29
|
-
const workflow = mastraClient.getWorkflow("
|
|
29
|
+
const workflow = mastraClient.getWorkflow("city-workflow");
|
|
30
30
|
```
|
|
31
31
|
|
|
32
32
|
## Workflow Methods
|
|
33
33
|
|
|
34
|
-
###
|
|
34
|
+
### details()
|
|
35
35
|
|
|
36
36
|
Retrieve detailed information about a workflow:
|
|
37
37
|
|
|
@@ -39,109 +39,138 @@ Retrieve detailed information about a workflow:
|
|
|
39
39
|
const details = await workflow.details();
|
|
40
40
|
```
|
|
41
41
|
|
|
42
|
-
###
|
|
42
|
+
### createRun()
|
|
43
43
|
|
|
44
|
-
|
|
44
|
+
Create a new workflow run instance:
|
|
45
45
|
|
|
46
46
|
```typescript
|
|
47
47
|
const run = await workflow.createRun();
|
|
48
48
|
|
|
49
|
+
// Or with an existing runId
|
|
50
|
+
const run = await workflow.createRun({ runId: "existing-run-id" });
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### startAsync()
|
|
54
|
+
|
|
55
|
+
Start a workflow run and await the full result:
|
|
56
|
+
|
|
57
|
+
```typescript
|
|
58
|
+
const run = await workflow.createRun();
|
|
59
|
+
|
|
60
|
+
const result = await run.startAsync({
|
|
61
|
+
inputData: {
|
|
62
|
+
city: "New York",
|
|
63
|
+
},
|
|
64
|
+
});
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
You can also pass `initialState` to set the starting values for the workflow's state:
|
|
68
|
+
|
|
69
|
+
```typescript
|
|
49
70
|
const result = await run.startAsync({
|
|
50
71
|
inputData: {
|
|
51
72
|
city: "New York",
|
|
52
73
|
},
|
|
74
|
+
initialState: {
|
|
75
|
+
count: 0,
|
|
76
|
+
items: [],
|
|
77
|
+
},
|
|
53
78
|
});
|
|
54
79
|
```
|
|
55
80
|
|
|
56
|
-
|
|
81
|
+
The `initialState` object should match the structure defined in the workflow's `stateSchema`. See [Workflow State](/docs/v1/workflows/workflow-state) for more details.
|
|
57
82
|
|
|
58
|
-
|
|
83
|
+
### start()
|
|
84
|
+
|
|
85
|
+
Start a workflow run without waiting for completion:
|
|
59
86
|
|
|
60
87
|
```typescript
|
|
61
88
|
const run = await workflow.createRun();
|
|
62
89
|
|
|
90
|
+
await run.start({
|
|
91
|
+
inputData: {
|
|
92
|
+
city: "New York",
|
|
93
|
+
},
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
// Poll for results later
|
|
97
|
+
const result = await workflow.runExecutionResult(run.runId);
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
This is useful for long-running workflows where you want to start execution and check results later.
|
|
101
|
+
|
|
102
|
+
### resumeAsync()
|
|
103
|
+
|
|
104
|
+
Resume a suspended workflow step and await the full result:
|
|
105
|
+
|
|
106
|
+
```typescript
|
|
107
|
+
const run = await workflow.createRun({ runId: prevRunId });
|
|
108
|
+
|
|
63
109
|
const result = await run.resumeAsync({
|
|
64
110
|
step: "step-id",
|
|
65
111
|
resumeData: { key: "value" },
|
|
66
112
|
});
|
|
67
113
|
```
|
|
68
114
|
|
|
69
|
-
###
|
|
115
|
+
### resume()
|
|
70
116
|
|
|
71
|
-
Resume workflow
|
|
117
|
+
Resume a suspended workflow step without waiting for completion:
|
|
72
118
|
|
|
73
119
|
```typescript
|
|
74
|
-
|
|
75
|
-
const workflow = mastraClient.getWorkflow("testWorkflow");
|
|
120
|
+
const run = await workflow.createRun({ runId: prevRunId });
|
|
76
121
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
resumeData: { key: "value" },
|
|
82
|
-
});
|
|
83
|
-
} catch (e) {
|
|
84
|
-
console.error(e);
|
|
85
|
-
}
|
|
122
|
+
await run.resume({
|
|
123
|
+
step: "step-id",
|
|
124
|
+
resumeData: { key: "value" },
|
|
125
|
+
});
|
|
86
126
|
```
|
|
87
127
|
|
|
88
|
-
###
|
|
128
|
+
### stream()
|
|
89
129
|
|
|
90
130
|
Stream workflow execution for real-time updates:
|
|
91
131
|
|
|
92
132
|
```typescript
|
|
93
|
-
|
|
94
|
-
const workflow = mastraClient.getWorkflow("testWorkflow");
|
|
95
|
-
|
|
96
|
-
const run = await workflow.createRun();
|
|
133
|
+
const run = await workflow.createRun();
|
|
97
134
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
135
|
+
const stream = await run.stream({
|
|
136
|
+
inputData: {
|
|
137
|
+
city: "New York",
|
|
138
|
+
},
|
|
139
|
+
});
|
|
103
140
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
}
|
|
107
|
-
} catch (e) {
|
|
108
|
-
console.error("Workflow error:", e);
|
|
141
|
+
for await (const chunk of stream) {
|
|
142
|
+
console.log(JSON.stringify(chunk, null, 2));
|
|
109
143
|
}
|
|
110
144
|
```
|
|
111
145
|
|
|
112
|
-
###
|
|
146
|
+
### runExecutionResult()
|
|
113
147
|
|
|
114
|
-
Get the result
|
|
148
|
+
Get the execution result for a workflow run:
|
|
115
149
|
|
|
116
150
|
```typescript
|
|
117
|
-
|
|
118
|
-
const workflow = mastraClient.getWorkflow("testWorkflow");
|
|
119
|
-
|
|
120
|
-
const run = await workflow.createRun();
|
|
121
|
-
|
|
122
|
-
// start the workflow run
|
|
123
|
-
const startResult = await run.start({
|
|
124
|
-
inputData: {
|
|
125
|
-
city: "New York",
|
|
126
|
-
},
|
|
127
|
-
});
|
|
128
|
-
|
|
129
|
-
const result = await workflow.runExecutionResult(run.runId);
|
|
130
|
-
|
|
131
|
-
console.log(result);
|
|
132
|
-
} catch (e) {
|
|
133
|
-
console.error(e);
|
|
134
|
-
}
|
|
151
|
+
const result = await workflow.runExecutionResult(runId);
|
|
135
152
|
```
|
|
136
153
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
### Workflow run result
|
|
154
|
+
<h3>Run result format</h3>
|
|
140
155
|
|
|
141
156
|
A workflow run result yields the following:
|
|
142
157
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
158
|
+
<PropertiesTable
|
|
159
|
+
content={[
|
|
160
|
+
{
|
|
161
|
+
name: "runId",
|
|
162
|
+
type: "string",
|
|
163
|
+
description: "Unique identifier for this workflow run instance",
|
|
164
|
+
},
|
|
165
|
+
{
|
|
166
|
+
name: "eventTimestamp",
|
|
167
|
+
type: "Date",
|
|
168
|
+
description: "The timestamp of the event",
|
|
169
|
+
},
|
|
170
|
+
{
|
|
171
|
+
name: "payload",
|
|
172
|
+
type: "object",
|
|
173
|
+
description: "Contains currentStep (id, status, output, payload) and workflowState (status, steps record)",
|
|
174
|
+
},
|
|
175
|
+
]}
|
|
176
|
+
/>
|
|
@@ -9,12 +9,11 @@ The `NetlifyDeployer` class handles deployment of standalone Mastra applications
|
|
|
9
9
|
|
|
10
10
|
## Usage example
|
|
11
11
|
|
|
12
|
-
```typescript title="src/mastra/index.ts"
|
|
12
|
+
```typescript title="src/mastra/index.ts" copy
|
|
13
13
|
import { Mastra } from "@mastra/core";
|
|
14
14
|
import { NetlifyDeployer } from "@mastra/deployer-netlify";
|
|
15
15
|
|
|
16
16
|
export const mastra = new Mastra({
|
|
17
|
-
// ...
|
|
18
17
|
deployer: new NetlifyDeployer(),
|
|
19
18
|
});
|
|
20
19
|
```
|
|
@@ -0,0 +1,362 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: Scorer Utils | Evals"
|
|
3
|
+
description: Utility functions for extracting data from scorer run inputs and outputs, including text content, reasoning, system messages, and tool calls.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Scorer Utils
|
|
7
|
+
|
|
8
|
+
Mastra provides utility functions to help extract and process data from scorer run inputs and outputs. These utilities are particularly useful in the `preprocess` step of custom scorers.
|
|
9
|
+
|
|
10
|
+
## Import
|
|
11
|
+
|
|
12
|
+
```typescript
|
|
13
|
+
import {
|
|
14
|
+
getAssistantMessageFromRunOutput,
|
|
15
|
+
getReasoningFromRunOutput,
|
|
16
|
+
getUserMessageFromRunInput,
|
|
17
|
+
getSystemMessagesFromRunInput,
|
|
18
|
+
getCombinedSystemPrompt,
|
|
19
|
+
extractToolCalls,
|
|
20
|
+
extractInputMessages,
|
|
21
|
+
extractAgentResponseMessages,
|
|
22
|
+
} from "@mastra/evals/scorers/utils";
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Message Extraction
|
|
26
|
+
|
|
27
|
+
### getAssistantMessageFromRunOutput
|
|
28
|
+
|
|
29
|
+
Extracts the text content from the first assistant message in the run output.
|
|
30
|
+
|
|
31
|
+
```typescript
|
|
32
|
+
const scorer = createScorer({
|
|
33
|
+
id: "my-scorer",
|
|
34
|
+
description: "My scorer",
|
|
35
|
+
type: "agent",
|
|
36
|
+
})
|
|
37
|
+
.preprocess(({ run }) => {
|
|
38
|
+
const response = getAssistantMessageFromRunOutput(run.output);
|
|
39
|
+
return { response };
|
|
40
|
+
})
|
|
41
|
+
.generateScore(({ results }) => {
|
|
42
|
+
return results.preprocessStepResult?.response ? 1 : 0;
|
|
43
|
+
});
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
<PropertiesTable
|
|
47
|
+
content={[
|
|
48
|
+
{
|
|
49
|
+
name: "output",
|
|
50
|
+
type: "ScorerRunOutputForAgent",
|
|
51
|
+
isOptional: true,
|
|
52
|
+
description: "The scorer run output (array of MastraDBMessage)",
|
|
53
|
+
},
|
|
54
|
+
]}
|
|
55
|
+
/>
|
|
56
|
+
|
|
57
|
+
**Returns:** `string | undefined` - The assistant message text, or undefined if no assistant message is found.
|
|
58
|
+
|
|
59
|
+
### getUserMessageFromRunInput
|
|
60
|
+
|
|
61
|
+
Extracts the text content from the first user message in the run input.
|
|
62
|
+
|
|
63
|
+
```typescript
|
|
64
|
+
.preprocess(({ run }) => {
|
|
65
|
+
const userMessage = getUserMessageFromRunInput(run.input);
|
|
66
|
+
return { userMessage };
|
|
67
|
+
})
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
<PropertiesTable
|
|
71
|
+
content={[
|
|
72
|
+
{
|
|
73
|
+
name: "input",
|
|
74
|
+
type: "ScorerRunInputForAgent",
|
|
75
|
+
isOptional: true,
|
|
76
|
+
description: "The scorer run input containing input messages",
|
|
77
|
+
},
|
|
78
|
+
]}
|
|
79
|
+
/>
|
|
80
|
+
|
|
81
|
+
**Returns:** `string | undefined` - The user message text, or undefined if no user message is found.
|
|
82
|
+
|
|
83
|
+
### extractInputMessages
|
|
84
|
+
|
|
85
|
+
Extracts text content from all input messages as an array.
|
|
86
|
+
|
|
87
|
+
```typescript
|
|
88
|
+
.preprocess(({ run }) => {
|
|
89
|
+
const allUserMessages = extractInputMessages(run.input);
|
|
90
|
+
return { conversationHistory: allUserMessages.join("\n") };
|
|
91
|
+
})
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
**Returns:** `string[]` - Array of text strings from each input message.
|
|
95
|
+
|
|
96
|
+
### extractAgentResponseMessages
|
|
97
|
+
|
|
98
|
+
Extracts text content from all assistant response messages as an array.
|
|
99
|
+
|
|
100
|
+
```typescript
|
|
101
|
+
.preprocess(({ run }) => {
|
|
102
|
+
const allResponses = extractAgentResponseMessages(run.output);
|
|
103
|
+
return { allResponses };
|
|
104
|
+
})
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
**Returns:** `string[]` - Array of text strings from each assistant message.
|
|
108
|
+
|
|
109
|
+
## Reasoning Extraction
|
|
110
|
+
|
|
111
|
+
### getReasoningFromRunOutput
|
|
112
|
+
|
|
113
|
+
Extracts reasoning text from the run output. This is particularly useful when evaluating responses from reasoning models like `deepseek-reasoner` that produce chain-of-thought reasoning.
|
|
114
|
+
|
|
115
|
+
Reasoning can be stored in two places:
|
|
116
|
+
1. `content.reasoning` - a string field on the message content
|
|
117
|
+
2. `content.parts` - as parts with `type: 'reasoning'` containing `details`
|
|
118
|
+
|
|
119
|
+
```typescript
|
|
120
|
+
import {
|
|
121
|
+
getReasoningFromRunOutput,
|
|
122
|
+
getAssistantMessageFromRunOutput
|
|
123
|
+
} from "@mastra/evals/scorers/utils";
|
|
124
|
+
|
|
125
|
+
const reasoningQualityScorer = createScorer({
|
|
126
|
+
id: "reasoning-quality",
|
|
127
|
+
name: "Reasoning Quality",
|
|
128
|
+
description: "Evaluates the quality of model reasoning",
|
|
129
|
+
type: "agent",
|
|
130
|
+
})
|
|
131
|
+
.preprocess(({ run }) => {
|
|
132
|
+
const reasoning = getReasoningFromRunOutput(run.output);
|
|
133
|
+
const response = getAssistantMessageFromRunOutput(run.output);
|
|
134
|
+
return { reasoning, response };
|
|
135
|
+
})
|
|
136
|
+
.analyze(({ results }) => {
|
|
137
|
+
const { reasoning } = results.preprocessStepResult || {};
|
|
138
|
+
return {
|
|
139
|
+
hasReasoning: !!reasoning,
|
|
140
|
+
reasoningLength: reasoning?.length || 0,
|
|
141
|
+
hasStepByStep: reasoning?.includes("step") || false,
|
|
142
|
+
};
|
|
143
|
+
})
|
|
144
|
+
.generateScore(({ results }) => {
|
|
145
|
+
const { hasReasoning, reasoningLength } = results.analyzeStepResult || {};
|
|
146
|
+
if (!hasReasoning) return 0;
|
|
147
|
+
// Score based on reasoning length (normalized to 0-1)
|
|
148
|
+
return Math.min(reasoningLength / 500, 1);
|
|
149
|
+
})
|
|
150
|
+
.generateReason(({ results, score }) => {
|
|
151
|
+
const { hasReasoning, reasoningLength } = results.analyzeStepResult || {};
|
|
152
|
+
if (!hasReasoning) {
|
|
153
|
+
return "No reasoning was provided by the model.";
|
|
154
|
+
}
|
|
155
|
+
return `Model provided ${reasoningLength} characters of reasoning. Score: ${score}`;
|
|
156
|
+
});
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
<PropertiesTable
|
|
160
|
+
content={[
|
|
161
|
+
{
|
|
162
|
+
name: "output",
|
|
163
|
+
type: "ScorerRunOutputForAgent",
|
|
164
|
+
isOptional: true,
|
|
165
|
+
description: "The scorer run output (array of MastraDBMessage)",
|
|
166
|
+
},
|
|
167
|
+
]}
|
|
168
|
+
/>
|
|
169
|
+
|
|
170
|
+
**Returns:** `string | undefined` - The reasoning text, or undefined if no reasoning is present.
|
|
171
|
+
|
|
172
|
+
## System Message Extraction
|
|
173
|
+
|
|
174
|
+
### getSystemMessagesFromRunInput
|
|
175
|
+
|
|
176
|
+
Extracts all system messages from the run input, including both standard system messages and tagged system messages (specialized prompts like memory instructions).
|
|
177
|
+
|
|
178
|
+
```typescript
|
|
179
|
+
.preprocess(({ run }) => {
|
|
180
|
+
const systemMessages = getSystemMessagesFromRunInput(run.input);
|
|
181
|
+
return {
|
|
182
|
+
systemPromptCount: systemMessages.length,
|
|
183
|
+
systemPrompts: systemMessages
|
|
184
|
+
};
|
|
185
|
+
})
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
**Returns:** `string[]` - Array of system message strings.
|
|
189
|
+
|
|
190
|
+
### getCombinedSystemPrompt
|
|
191
|
+
|
|
192
|
+
Combines all system messages into a single prompt string, joined with double newlines.
|
|
193
|
+
|
|
194
|
+
```typescript
|
|
195
|
+
.preprocess(({ run }) => {
|
|
196
|
+
const fullSystemPrompt = getCombinedSystemPrompt(run.input);
|
|
197
|
+
return { fullSystemPrompt };
|
|
198
|
+
})
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
**Returns:** `string` - Combined system prompt string.
|
|
202
|
+
|
|
203
|
+
## Tool Call Extraction
|
|
204
|
+
|
|
205
|
+
### extractToolCalls
|
|
206
|
+
|
|
207
|
+
Extracts information about all tool calls from the run output, including tool names, call IDs, and their positions in the message array.
|
|
208
|
+
|
|
209
|
+
```typescript
|
|
210
|
+
const toolUsageScorer = createScorer({
|
|
211
|
+
id: "tool-usage",
|
|
212
|
+
description: "Evaluates tool usage patterns",
|
|
213
|
+
type: "agent",
|
|
214
|
+
})
|
|
215
|
+
.preprocess(({ run }) => {
|
|
216
|
+
const { tools, toolCallInfos } = extractToolCalls(run.output);
|
|
217
|
+
return {
|
|
218
|
+
toolsUsed: tools,
|
|
219
|
+
toolCount: tools.length,
|
|
220
|
+
toolDetails: toolCallInfos,
|
|
221
|
+
};
|
|
222
|
+
})
|
|
223
|
+
.generateScore(({ results }) => {
|
|
224
|
+
const { toolCount } = results.preprocessStepResult || {};
|
|
225
|
+
// Score based on appropriate tool usage
|
|
226
|
+
return toolCount > 0 ? 1 : 0;
|
|
227
|
+
});
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
**Returns:**
|
|
231
|
+
|
|
232
|
+
```typescript
|
|
233
|
+
{
|
|
234
|
+
tools: string[]; // Array of tool names
|
|
235
|
+
toolCallInfos: ToolCallInfo[]; // Detailed tool call information
|
|
236
|
+
}
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
Where `ToolCallInfo` is:
|
|
240
|
+
|
|
241
|
+
```typescript
|
|
242
|
+
type ToolCallInfo = {
|
|
243
|
+
toolName: string; // Name of the tool
|
|
244
|
+
toolCallId: string; // Unique call identifier
|
|
245
|
+
messageIndex: number; // Index in the output array
|
|
246
|
+
invocationIndex: number; // Index within message's tool invocations
|
|
247
|
+
};
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
## Test Utilities
|
|
251
|
+
|
|
252
|
+
These utilities help create test data for scorer development.
|
|
253
|
+
|
|
254
|
+
### createTestMessage
|
|
255
|
+
|
|
256
|
+
Creates a `MastraDBMessage` object for testing purposes.
|
|
257
|
+
|
|
258
|
+
```typescript
|
|
259
|
+
import { createTestMessage } from "@mastra/evals/scorers/utils";
|
|
260
|
+
|
|
261
|
+
const userMessage = createTestMessage({
|
|
262
|
+
content: "What is the weather?",
|
|
263
|
+
role: "user",
|
|
264
|
+
});
|
|
265
|
+
|
|
266
|
+
const assistantMessage = createTestMessage({
|
|
267
|
+
content: "The weather is sunny.",
|
|
268
|
+
role: "assistant",
|
|
269
|
+
toolInvocations: [
|
|
270
|
+
{
|
|
271
|
+
toolCallId: "call-1",
|
|
272
|
+
toolName: "weatherTool",
|
|
273
|
+
args: { location: "London" },
|
|
274
|
+
result: { temp: 20 },
|
|
275
|
+
state: "result",
|
|
276
|
+
},
|
|
277
|
+
],
|
|
278
|
+
});
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
### createAgentTestRun
|
|
282
|
+
|
|
283
|
+
Creates a complete test run object for testing scorers.
|
|
284
|
+
|
|
285
|
+
```typescript
|
|
286
|
+
import { createAgentTestRun, createTestMessage } from "@mastra/evals/scorers/utils";
|
|
287
|
+
|
|
288
|
+
const testRun = createAgentTestRun({
|
|
289
|
+
inputMessages: [
|
|
290
|
+
createTestMessage({ content: "Hello", role: "user" }),
|
|
291
|
+
],
|
|
292
|
+
output: [
|
|
293
|
+
createTestMessage({ content: "Hi there!", role: "assistant" }),
|
|
294
|
+
],
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
// Run your scorer with the test data
|
|
298
|
+
const result = await myScorer.run({
|
|
299
|
+
input: testRun.input,
|
|
300
|
+
output: testRun.output,
|
|
301
|
+
});
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
## Complete Example
|
|
305
|
+
|
|
306
|
+
Here's a complete example showing how to use multiple utilities together:
|
|
307
|
+
|
|
308
|
+
```typescript
|
|
309
|
+
import { createScorer } from "@mastra/core/evals";
|
|
310
|
+
import {
|
|
311
|
+
getAssistantMessageFromRunOutput,
|
|
312
|
+
getReasoningFromRunOutput,
|
|
313
|
+
getUserMessageFromRunInput,
|
|
314
|
+
getCombinedSystemPrompt,
|
|
315
|
+
extractToolCalls,
|
|
316
|
+
} from "@mastra/evals/scorers/utils";
|
|
317
|
+
|
|
318
|
+
const comprehensiveScorer = createScorer({
|
|
319
|
+
id: "comprehensive-analysis",
|
|
320
|
+
name: "Comprehensive Analysis",
|
|
321
|
+
description: "Analyzes all aspects of an agent response",
|
|
322
|
+
type: "agent",
|
|
323
|
+
})
|
|
324
|
+
.preprocess(({ run }) => {
|
|
325
|
+
// Extract all relevant data
|
|
326
|
+
const userMessage = getUserMessageFromRunInput(run.input);
|
|
327
|
+
const response = getAssistantMessageFromRunOutput(run.output);
|
|
328
|
+
const reasoning = getReasoningFromRunOutput(run.output);
|
|
329
|
+
const systemPrompt = getCombinedSystemPrompt(run.input);
|
|
330
|
+
const { tools, toolCallInfos } = extractToolCalls(run.output);
|
|
331
|
+
|
|
332
|
+
return {
|
|
333
|
+
userMessage,
|
|
334
|
+
response,
|
|
335
|
+
reasoning,
|
|
336
|
+
systemPrompt,
|
|
337
|
+
toolsUsed: tools,
|
|
338
|
+
toolCount: tools.length,
|
|
339
|
+
};
|
|
340
|
+
})
|
|
341
|
+
.generateScore(({ results }) => {
|
|
342
|
+
const { response, reasoning, toolCount } = results.preprocessStepResult || {};
|
|
343
|
+
|
|
344
|
+
let score = 0;
|
|
345
|
+
if (response && response.length > 0) score += 0.4;
|
|
346
|
+
if (reasoning) score += 0.3;
|
|
347
|
+
if (toolCount > 0) score += 0.3;
|
|
348
|
+
|
|
349
|
+
return score;
|
|
350
|
+
})
|
|
351
|
+
.generateReason(({ results, score }) => {
|
|
352
|
+
const { response, reasoning, toolCount } = results.preprocessStepResult || {};
|
|
353
|
+
|
|
354
|
+
const parts = [];
|
|
355
|
+
if (response) parts.push("provided a response");
|
|
356
|
+
if (reasoning) parts.push("included reasoning");
|
|
357
|
+
if (toolCount > 0) parts.push(`used ${toolCount} tool(s)`);
|
|
358
|
+
|
|
359
|
+
return `Score: ${score}. The agent ${parts.join(", ")}.`;
|
|
360
|
+
});
|
|
361
|
+
```
|
|
362
|
+
|