@mastra/mcp-docs-server 0.13.44 → 0.13.45-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/organized/changelogs/%40internal%2Fstorage-test-utils.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fagent-builder.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fai-sdk.md +253 -53
- package/.docs/organized/changelogs/%40mastra%2Fastra.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fchroma.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fclickhouse.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fcloud.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fcloudflare-d1.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fcloudflare.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fcore.md +307 -107
- package/.docs/organized/changelogs/%40mastra%2Fcouchbase.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloud.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-netlify.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-vercel.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fdynamodb.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fevals.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Flance.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Flibsql.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Floggers.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fmcp-registry-registry.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fmcp.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fmemory.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fmongodb.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fmssql.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fopensearch.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fpg.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fpinecone.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fqdrant.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Frag.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Freact.md +182 -1
- package/.docs/organized/changelogs/%40mastra%2Fschema-compat.md +36 -0
- package/.docs/organized/changelogs/%40mastra%2Fserver.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fturbopuffer.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fupstash.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fvectorize.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fvoice-azure.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fvoice-cloudflare.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fvoice-deepgram.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fvoice-elevenlabs.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fvoice-gladia.md +104 -1
- package/.docs/organized/changelogs/%40mastra%2Fvoice-google-gemini-live.md +49 -1
- package/.docs/organized/changelogs/%40mastra%2Fvoice-google.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fvoice-murf.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fvoice-openai-realtime.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fvoice-openai.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fvoice-playai.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fvoice-sarvam.md +201 -1
- package/.docs/organized/changelogs/%40mastra%2Fvoice-speechify.md +201 -1
- package/.docs/organized/changelogs/create-mastra.md +201 -1
- package/.docs/organized/changelogs/mastra.md +201 -1
- package/.docs/raw/agents/adding-voice.mdx +49 -0
- package/.docs/raw/course/01-first-agent/05-running-playground.md +5 -5
- package/.docs/raw/course/01-first-agent/09-testing-your-agent.md +3 -3
- package/.docs/raw/course/01-first-agent/13-testing-your-tool.md +3 -3
- package/.docs/raw/course/01-first-agent/17-testing-memory.md +2 -2
- package/.docs/raw/course/04-workflows/07-using-playground.md +1 -1
- package/.docs/raw/frameworks/agentic-uis/ai-sdk.mdx +23 -1
- package/.docs/raw/reference/client-js/memory.mdx +43 -0
- package/.docs/raw/reference/core/mastra-class.mdx +8 -0
- package/.docs/raw/reference/core/mastra-model-gateway.mdx +223 -0
- package/.docs/raw/reference/scorers/answer-relevancy.mdx +28 -98
- package/.docs/raw/reference/scorers/answer-similarity.mdx +12 -258
- package/.docs/raw/reference/scorers/bias.mdx +29 -87
- package/.docs/raw/reference/scorers/completeness.mdx +32 -91
- package/.docs/raw/reference/scorers/content-similarity.mdx +29 -99
- package/.docs/raw/reference/scorers/context-precision.mdx +28 -130
- package/.docs/raw/reference/scorers/faithfulness.mdx +28 -101
- package/.docs/raw/reference/scorers/hallucination.mdx +28 -103
- package/.docs/raw/reference/scorers/keyword-coverage.mdx +28 -107
- package/.docs/raw/reference/scorers/textual-difference.mdx +27 -100
- package/.docs/raw/reference/scorers/tone-consistency.mdx +25 -98
- package/.docs/raw/reference/scorers/toxicity.mdx +29 -92
- package/.docs/raw/reference/storage/cloudflare-d1.mdx +37 -0
- package/.docs/raw/reference/storage/lance.mdx +33 -0
- package/.docs/raw/reference/storage/libsql.mdx +37 -0
- package/.docs/raw/reference/storage/mongodb.mdx +39 -0
- package/.docs/raw/reference/storage/mssql.mdx +37 -0
- package/.docs/raw/reference/storage/postgresql.mdx +37 -0
- package/.docs/raw/reference/streaming/agents/stream.mdx +7 -0
- package/.docs/raw/reference/voice/composite-voice.mdx +71 -28
- package/.docs/raw/reference/voice/voice.listen.mdx +86 -52
- package/.docs/raw/reference/voice/voice.speak.mdx +75 -40
- package/.docs/raw/voice/overview.mdx +67 -0
- package/.docs/raw/workflows/control-flow.mdx +180 -0
- package/CHANGELOG.md +10 -0
- package/dist/{chunk-TUAHUTTB.js → chunk-VE65X75W.js} +24 -4
- package/dist/prepare-docs/package-changes.d.ts.map +1 -1
- package/dist/prepare-docs/prepare.js +1 -1
- package/dist/stdio.js +1 -1
- package/package.json +5 -5
|
@@ -102,124 +102,45 @@ The scorer handles several special cases:
|
|
|
102
102
|
- Case differences: "JavaScript" matches "javascript"
|
|
103
103
|
- Common words: Ignored in scoring to focus on meaningful keywords
|
|
104
104
|
|
|
105
|
-
##
|
|
105
|
+
## Example
|
|
106
106
|
|
|
107
|
-
|
|
107
|
+
Evaluate keyword coverage between input queries and agent responses:
|
|
108
108
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
```typescript title="src/example-full-keyword-coverage.ts" showLineNumbers copy
|
|
112
|
-
import { createKeywordCoverageScorer } from "@mastra/evals/scorers/code";
|
|
113
|
-
|
|
114
|
-
const scorer = createKeywordCoverageScorer();
|
|
115
|
-
|
|
116
|
-
const input = "JavaScript frameworks like React and Vue";
|
|
117
|
-
const output =
|
|
118
|
-
"Popular JavaScript frameworks include React and Vue for web development";
|
|
119
|
-
|
|
120
|
-
const result = await scorer.run({
|
|
121
|
-
input: [{ role: "user", content: input }],
|
|
122
|
-
output: { role: "assistant", text: output },
|
|
123
|
-
});
|
|
124
|
-
|
|
125
|
-
console.log("Score:", result.score);
|
|
126
|
-
console.log("AnalyzeStepResult:", result.analyzeStepResult);
|
|
127
|
-
```
|
|
128
|
-
|
|
129
|
-
#### Full coverage output
|
|
130
|
-
|
|
131
|
-
A score of 1 indicates that all expected keywords were found in the response. The `analyzeStepResult` field confirms that the number of matched keywords equals the total number extracted from the input.
|
|
132
|
-
|
|
133
|
-
```typescript
|
|
134
|
-
{
|
|
135
|
-
score: 1,
|
|
136
|
-
analyzeStepResult: {
|
|
137
|
-
totalKeywords: 4,
|
|
138
|
-
matchedKeywords: 4
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
```
|
|
142
|
-
|
|
143
|
-
### Partial coverage example
|
|
144
|
-
|
|
145
|
-
In this example, the response includes some, but not all, of the important keywords from the input. The score reflects partial coverage, with key terms either missing or only partially matched.
|
|
146
|
-
|
|
147
|
-
```typescript title="src/example-partial-keyword-coverage.ts" showLineNumbers copy
|
|
109
|
+
```typescript title="src/example-keyword-coverage.ts" showLineNumbers copy
|
|
110
|
+
import { runExperiment } from "@mastra/core/scores";
|
|
148
111
|
import { createKeywordCoverageScorer } from "@mastra/evals/scorers/code";
|
|
112
|
+
import { myAgent } from "./agent";
|
|
149
113
|
|
|
150
114
|
const scorer = createKeywordCoverageScorer();
|
|
151
115
|
|
|
152
|
-
const
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
totalKeywords: 6,
|
|
173
|
-
matchedKeywords: 3
|
|
174
|
-
}
|
|
175
|
-
}
|
|
176
|
-
```
|
|
177
|
-
|
|
178
|
-
### Minimal coverage example
|
|
179
|
-
|
|
180
|
-
In this example, the response includes very few of the important keywords from the input. The score reflects minimal coverage, with most key terms missing or unaccounted for.
|
|
181
|
-
|
|
182
|
-
```typescript title="src/example-minimal-keyword-coverage.ts" showLineNumbers copy
|
|
183
|
-
import { createKeywordCoverageScorer } from "@mastra/evals/scorers/code";
|
|
184
|
-
|
|
185
|
-
const scorer = createKeywordCoverageScorer();
|
|
186
|
-
|
|
187
|
-
const input =
|
|
188
|
-
"Machine learning models require data preprocessing, feature engineering, and hyperparameter tuning";
|
|
189
|
-
const output = "Data preparation is important for models";
|
|
190
|
-
|
|
191
|
-
const result = await scorer.run({
|
|
192
|
-
input: [{ role: "user", content: input }],
|
|
193
|
-
output: { role: "assistant", text: output },
|
|
116
|
+
const result = await runExperiment({
|
|
117
|
+
data: [
|
|
118
|
+
{
|
|
119
|
+
input: "JavaScript frameworks like React and Vue",
|
|
120
|
+
},
|
|
121
|
+
{
|
|
122
|
+
input: "TypeScript offers interfaces, generics, and type inference",
|
|
123
|
+
},
|
|
124
|
+
{
|
|
125
|
+
input:
|
|
126
|
+
"Machine learning models require data preprocessing, feature engineering, and hyperparameter tuning",
|
|
127
|
+
},
|
|
128
|
+
],
|
|
129
|
+
scorers: [scorer],
|
|
130
|
+
target: myAgent,
|
|
131
|
+
onItemComplete: ({ scorerResults }) => {
|
|
132
|
+
console.log({
|
|
133
|
+
score: scorerResults[scorer.name].score,
|
|
134
|
+
});
|
|
135
|
+
},
|
|
194
136
|
});
|
|
195
137
|
|
|
196
|
-
console.log(
|
|
197
|
-
console.log("AnalyzeStepResult:", result.analyzeStepResult);
|
|
198
|
-
```
|
|
199
|
-
|
|
200
|
-
#### Minimal coverage output
|
|
201
|
-
|
|
202
|
-
A low score indicates that only a small number of the expected keywords were present in the response. The `analyzeStepResult` field highlights the gap between total and matched keywords, signaling insufficient coverage.
|
|
203
|
-
|
|
204
|
-
```typescript
|
|
205
|
-
{
|
|
206
|
-
score: 0.2,
|
|
207
|
-
analyzeStepResult: {
|
|
208
|
-
totalKeywords: 10,
|
|
209
|
-
matchedKeywords: 2
|
|
210
|
-
}
|
|
211
|
-
}
|
|
138
|
+
console.log(result.scores);
|
|
212
139
|
```
|
|
213
140
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
You can create a `KeywordCoverageMetric` instance with default settings. No additional configuration is required.
|
|
217
|
-
|
|
218
|
-
```typescript
|
|
219
|
-
const metric = new KeywordCoverageMetric();
|
|
220
|
-
```
|
|
141
|
+
For more details on `runExperiment`, see the [runExperiment reference](/reference/scorers/run-experiment).
|
|
221
142
|
|
|
222
|
-
|
|
143
|
+
To add this scorer to an agent, see the [Scorers overview](/docs/scorers/overview#adding-scorers-to-agents) guide.
|
|
223
144
|
|
|
224
145
|
## Related
|
|
225
146
|
|
|
@@ -83,118 +83,45 @@ A textual difference score between 0 and 1:
|
|
|
83
83
|
- **0.1–0.3**: Major differences – extensive changes needed.
|
|
84
84
|
- **0.0**: Completely different texts.
|
|
85
85
|
|
|
86
|
-
##
|
|
86
|
+
## Example
|
|
87
87
|
|
|
88
|
-
|
|
88
|
+
Measure textual differences between expected and actual agent outputs:
|
|
89
89
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
```typescript title="src/example-no-differences.ts" showLineNumbers copy
|
|
90
|
+
```typescript title="src/example-textual-difference.ts" showLineNumbers copy
|
|
91
|
+
import { runExperiment } from "@mastra/core/scores";
|
|
93
92
|
import { createTextualDifferenceScorer } from "@mastra/evals/scorers/code";
|
|
93
|
+
import { myAgent } from "./agent";
|
|
94
94
|
|
|
95
95
|
const scorer = createTextualDifferenceScorer();
|
|
96
96
|
|
|
97
|
-
const
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
analyzeStepResult: {
|
|
117
|
-
confidence: 1,
|
|
118
|
-
ratio: 1,
|
|
119
|
-
changes: 0,
|
|
120
|
-
lengthDiff: 0,
|
|
97
|
+
const result = await runExperiment({
|
|
98
|
+
data: [
|
|
99
|
+
{
|
|
100
|
+
input: "Summarize the concept of recursion",
|
|
101
|
+
groundTruth:
|
|
102
|
+
"Recursion is when a function calls itself to solve a problem by breaking it into smaller subproblems.",
|
|
103
|
+
},
|
|
104
|
+
{
|
|
105
|
+
input: "What is the capital of France?",
|
|
106
|
+
groundTruth: "The capital of France is Paris.",
|
|
107
|
+
},
|
|
108
|
+
],
|
|
109
|
+
scorers: [scorer],
|
|
110
|
+
target: myAgent,
|
|
111
|
+
onItemComplete: ({ scorerResults }) => {
|
|
112
|
+
console.log({
|
|
113
|
+
score: scorerResults[scorer.name].score,
|
|
114
|
+
groundTruth: scorerResults[scorer.name].groundTruth,
|
|
115
|
+
});
|
|
121
116
|
},
|
|
122
|
-
}
|
|
123
|
-
```
|
|
124
|
-
|
|
125
|
-
### Minor differences example
|
|
126
|
-
|
|
127
|
-
In this example, the texts have small variations. The scorer detects these minor differences and returns a moderate similarity score.
|
|
128
|
-
|
|
129
|
-
```typescript title="src/example-minor-differences.ts" showLineNumbers copy
|
|
130
|
-
import { createTextualDifferenceScorer } from "@mastra/evals/scorers/code";
|
|
131
|
-
|
|
132
|
-
const scorer = createTextualDifferenceScorer();
|
|
133
|
-
|
|
134
|
-
const input = "Hello world! How are you?";
|
|
135
|
-
const output = "Hello there! How is it going?";
|
|
136
|
-
|
|
137
|
-
const result = await scorer.run({
|
|
138
|
-
input: [{ role: "user", content: input }],
|
|
139
|
-
output: { role: "assistant", text: output },
|
|
140
117
|
});
|
|
141
118
|
|
|
142
|
-
console.log(
|
|
143
|
-
console.log("AnalyzeStepResult:", result.analyzeStepResult);
|
|
119
|
+
console.log(result.scores);
|
|
144
120
|
```
|
|
145
121
|
|
|
146
|
-
|
|
122
|
+
For more details on `runExperiment`, see the [runExperiment reference](/reference/scorers/run-experiment).
|
|
147
123
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
```typescript
|
|
151
|
-
{
|
|
152
|
-
score: 0.5925925925925926,
|
|
153
|
-
analyzeStepResult: {
|
|
154
|
-
confidence: 0.8620689655172413,
|
|
155
|
-
ratio: 0.5925925925925926,
|
|
156
|
-
changes: 5,
|
|
157
|
-
lengthDiff: 0.13793103448275862
|
|
158
|
-
}
|
|
159
|
-
}
|
|
160
|
-
```
|
|
161
|
-
|
|
162
|
-
### Major differences example
|
|
163
|
-
|
|
164
|
-
In this example, the texts differ significantly. The scorer detects extensive changes and returns a low similarity score.
|
|
165
|
-
|
|
166
|
-
```typescript title="src/example-major-differences.ts" showLineNumbers copy
|
|
167
|
-
import { createTextualDifferenceScorer } from "@mastra/evals/scorers/code";
|
|
168
|
-
|
|
169
|
-
const scorer = createTextualDifferenceScorer();
|
|
170
|
-
|
|
171
|
-
const input = "Python is a high-level programming language";
|
|
172
|
-
const output = "JavaScript is used for web development";
|
|
173
|
-
|
|
174
|
-
const result = await scorer.run({
|
|
175
|
-
input: [{ role: "user", content: input }],
|
|
176
|
-
output: { role: "assistant", text: output },
|
|
177
|
-
});
|
|
178
|
-
|
|
179
|
-
console.log("Score:", result.score);
|
|
180
|
-
console.log("AnalyzeStepResult:", result.analyzeStepResult);
|
|
181
|
-
```
|
|
182
|
-
|
|
183
|
-
#### Major differences output
|
|
184
|
-
|
|
185
|
-
The scorer returns a low score due to significant differences between the texts. The detailed `analyzeStepResult` shows numerous changes and a notable length difference.
|
|
186
|
-
|
|
187
|
-
```typescript
|
|
188
|
-
{
|
|
189
|
-
score: 0.3170731707317073,
|
|
190
|
-
analyzeStepResult: {
|
|
191
|
-
confidence: 0.8636363636363636,
|
|
192
|
-
ratio: 0.3170731707317073,
|
|
193
|
-
changes: 8,
|
|
194
|
-
lengthDiff: 0.13636363636363635
|
|
195
|
-
}
|
|
196
|
-
}
|
|
197
|
-
```
|
|
124
|
+
To add this scorer to an agent, see the [Scorers overview](/docs/scorers/overview#adding-scorers-to-agents) guide.
|
|
198
125
|
|
|
199
126
|
## Related
|
|
200
127
|
|
|
@@ -94,116 +94,43 @@ Object with tone metrics:
|
|
|
94
94
|
- **avgSentiment**: Average sentiment across sentences (stability mode).
|
|
95
95
|
- **sentimentVariance**: Variance of sentiment across sentences (stability mode).
|
|
96
96
|
|
|
97
|
-
##
|
|
97
|
+
## Example
|
|
98
98
|
|
|
99
|
-
|
|
99
|
+
Evaluate tone consistency between related agent responses:
|
|
100
100
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
```typescript title="src/example-positive-tone.ts" showLineNumbers copy
|
|
104
|
-
import { createToneScorer } from "@mastra/evals/scorers/code";
|
|
105
|
-
|
|
106
|
-
const scorer = createToneScorer();
|
|
107
|
-
|
|
108
|
-
const input = "This product is fantastic and amazing!";
|
|
109
|
-
const output = "The product is excellent and wonderful!";
|
|
110
|
-
|
|
111
|
-
const result = await scorer.run({
|
|
112
|
-
input: [{ role: "user", content: input }],
|
|
113
|
-
output: { role: "assistant", text: output },
|
|
114
|
-
});
|
|
115
|
-
|
|
116
|
-
console.log("Score:", result.score);
|
|
117
|
-
console.log("AnalyzeStepResult:", result.analyzeStepResult);
|
|
118
|
-
```
|
|
119
|
-
|
|
120
|
-
#### Positive tone output
|
|
121
|
-
|
|
122
|
-
The scorer returns a high score reflecting strong sentiment alignment. The `analyzeStepResult` field provides sentiment values and the difference between them.
|
|
123
|
-
|
|
124
|
-
```typescript
|
|
125
|
-
{
|
|
126
|
-
score: 0.8333333333333335,
|
|
127
|
-
analyzeStepResult: {
|
|
128
|
-
responseSentiment: 1.3333333333333333,
|
|
129
|
-
referenceSentiment: 1.1666666666666667,
|
|
130
|
-
difference: 0.16666666666666652,
|
|
131
|
-
},
|
|
132
|
-
}
|
|
133
|
-
```
|
|
134
|
-
|
|
135
|
-
### Stable tone example
|
|
136
|
-
|
|
137
|
-
In this example, the text’s internal tone consistency is analyzed by passing an empty response. This signals the scorer to evaluate sentiment stability within the single input text, resulting in a score reflecting how uniform the tone is throughout.
|
|
138
|
-
|
|
139
|
-
```typescript title="src/example-stable-tone.ts" showLineNumbers copy
|
|
101
|
+
```typescript title="src/example-tone-consistency.ts" showLineNumbers copy
|
|
102
|
+
import { runExperiment } from "@mastra/core/scores";
|
|
140
103
|
import { createToneScorer } from "@mastra/evals/scorers/code";
|
|
104
|
+
import { myAgent } from "./agent";
|
|
141
105
|
|
|
142
106
|
const scorer = createToneScorer();
|
|
143
107
|
|
|
144
|
-
const
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
{
|
|
162
|
-
score: 0.9444444444444444,
|
|
163
|
-
analyzeStepResult: {
|
|
164
|
-
avgSentiment: 1.3333333333333333,
|
|
165
|
-
sentimentVariance: 0.05555555555555556,
|
|
108
|
+
const result = await runExperiment({
|
|
109
|
+
data: [
|
|
110
|
+
{
|
|
111
|
+
input: "How was your experience with our service?",
|
|
112
|
+
groundTruth: "The service was excellent and exceeded expectations!",
|
|
113
|
+
},
|
|
114
|
+
{
|
|
115
|
+
input: "Tell me about the customer support",
|
|
116
|
+
groundTruth: "The support team was friendly and very helpful.",
|
|
117
|
+
},
|
|
118
|
+
],
|
|
119
|
+
scorers: [scorer],
|
|
120
|
+
target: myAgent,
|
|
121
|
+
onItemComplete: ({ scorerResults }) => {
|
|
122
|
+
console.log({
|
|
123
|
+
score: scorerResults[scorer.name].score,
|
|
124
|
+
});
|
|
166
125
|
},
|
|
167
|
-
}
|
|
168
|
-
```
|
|
169
|
-
|
|
170
|
-
### Mixed tone example
|
|
171
|
-
|
|
172
|
-
In this example, the input and response have different emotional tones. The scorer picks up on these variations and gives a lower consistency score.
|
|
173
|
-
|
|
174
|
-
```typescript title="src/example-mixed-tone.ts" showLineNumbers copy
|
|
175
|
-
import { createToneScorer } from "@mastra/evals/scorers/code";
|
|
176
|
-
|
|
177
|
-
const scorer = createToneScorer();
|
|
178
|
-
|
|
179
|
-
const input =
|
|
180
|
-
"The interface is frustrating and confusing, though it has potential.";
|
|
181
|
-
const output =
|
|
182
|
-
"The design shows promise but needs significant improvements to be usable.";
|
|
183
|
-
|
|
184
|
-
const result = await scorer.run({
|
|
185
|
-
input: [{ role: "user", content: input }],
|
|
186
|
-
output: { role: "assistant", text: output },
|
|
187
126
|
});
|
|
188
127
|
|
|
189
|
-
console.log(
|
|
190
|
-
console.log("AnalyzeStepResult:", result.analyzeStepResult);
|
|
128
|
+
console.log(result.scores);
|
|
191
129
|
```
|
|
192
130
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
The scorer returns a low score due to the noticeable differences in emotional tone. The `analyzeStepResult` field highlights the sentiment values and the degree of variation between them.
|
|
131
|
+
For more details on `runExperiment`, see the [runExperiment reference](/reference/scorers/run-experiment).
|
|
196
132
|
|
|
197
|
-
|
|
198
|
-
{
|
|
199
|
-
score: 0.4181818181818182,
|
|
200
|
-
analyzeStepResult: {
|
|
201
|
-
responseSentiment: -0.4,
|
|
202
|
-
referenceSentiment: 0.18181818181818182,
|
|
203
|
-
difference: 0.5818181818181818,
|
|
204
|
-
},
|
|
205
|
-
}
|
|
206
|
-
```
|
|
133
|
+
To add this scorer to an agent, see the [Scorers overview](/docs/scorers/overview#adding-scorers-to-agents) guide.
|
|
207
134
|
|
|
208
135
|
## Related
|
|
209
136
|
|
|
@@ -119,108 +119,45 @@ A toxicity score between 0 and 1:
|
|
|
119
119
|
- **0.1–0.3**: Mild toxicity.
|
|
120
120
|
- **0.0**: No toxic elements detected.
|
|
121
121
|
|
|
122
|
-
##
|
|
122
|
+
## Example
|
|
123
123
|
|
|
124
|
-
|
|
124
|
+
Evaluate agent responses for toxic, biased, or harmful content:
|
|
125
125
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
```typescript title="src/example-high-toxicity.ts" showLineNumbers copy
|
|
129
|
-
import { createToxicityScorer } from "@mastra/evals/scorers/llm";
|
|
130
|
-
|
|
131
|
-
const scorer = createToxicityScorer({ model: "openai/gpt-4o-mini" });
|
|
132
|
-
|
|
133
|
-
const query = "What do you think about the new team member?";
|
|
134
|
-
const response =
|
|
135
|
-
"That incompetent fool is a complete waste of space. They clearly bought their degree and have no business being here. Everyone hates working with them.";
|
|
136
|
-
|
|
137
|
-
const result = await scorer.run({
|
|
138
|
-
input: [{ role: "user", content: query }],
|
|
139
|
-
output: { role: "assistant", text: response },
|
|
140
|
-
});
|
|
141
|
-
|
|
142
|
-
console.log("Score:", result.score);
|
|
143
|
-
console.log("Reason:", result.reason);
|
|
144
|
-
```
|
|
145
|
-
|
|
146
|
-
#### High toxicity output
|
|
147
|
-
|
|
148
|
-
The scorer returns a high score indicating severe toxic content. The `reason` field details the presence of personal attacks, derogatory language, and harmful generalizations.
|
|
149
|
-
|
|
150
|
-
```typescript
|
|
151
|
-
{
|
|
152
|
-
score: 1,
|
|
153
|
-
reason: "The score is 1 because the output contains personal attacks such as 'incompetent fool' and 'complete waste of space', along with derogatory assumptions about the new team member's qualifications, stating they 'clearly bought their degree'. Additionally, it generalizes the feelings of others negatively by claiming 'Everyone hates working with them', which contributes to a toxic environment."
|
|
154
|
-
}
|
|
155
|
-
```
|
|
156
|
-
|
|
157
|
-
### Mixed toxicity example
|
|
158
|
-
|
|
159
|
-
In this example, the response combines toxic and professional elements. The scorer identifies the problematic language while acknowledging the constructive feedback, resulting in a moderate toxicity score.
|
|
160
|
-
|
|
161
|
-
```typescript title="src/example-mixed-toxicity.ts" showLineNumbers copy
|
|
162
|
-
import { createToxicityScorer } from "@mastra/evals/scorers/llm";
|
|
163
|
-
|
|
164
|
-
const scorer = createToxicityScorer({ model: "openai/gpt-4o-mini" });
|
|
165
|
-
|
|
166
|
-
const query = "How was the meeting discussion?";
|
|
167
|
-
const response = [
|
|
168
|
-
"Half the meeting was just listening to the marketing team ramble on with their usual clueless suggestions.",
|
|
169
|
-
"At least the engineering team's presentation was focused and had some solid technical solutions we can actually use.",
|
|
170
|
-
];
|
|
171
|
-
|
|
172
|
-
const result = await scorer.run({
|
|
173
|
-
input: [{ role: "user", content: query }],
|
|
174
|
-
output: { role: "assistant", text: response },
|
|
175
|
-
});
|
|
176
|
-
|
|
177
|
-
console.log("Score:", result.score);
|
|
178
|
-
console.log("Reason:", result.reason);
|
|
179
|
-
```
|
|
180
|
-
|
|
181
|
-
#### Mixed toxicity output
|
|
182
|
-
|
|
183
|
-
The scorer returns a moderate score reflecting a combination of dismissive language and professional feedback. The `reason` field explains the presence of both toxic and constructive elements in the response.
|
|
184
|
-
|
|
185
|
-
```typescript
|
|
186
|
-
{
|
|
187
|
-
score: 0.5,
|
|
188
|
-
reason: "The score is 0.5 because the output contains some dismissive language towards the marketing team but maintains professional and constructive comments about the engineering team."
|
|
189
|
-
}
|
|
190
|
-
```
|
|
191
|
-
|
|
192
|
-
### No toxicity example
|
|
193
|
-
|
|
194
|
-
In this example, the response is professional and constructive, with no toxic or harmful language detected.
|
|
195
|
-
|
|
196
|
-
```typescript title="src/example-no-toxicity.ts" showLineNumbers copy
|
|
126
|
+
```typescript title="src/example-toxicity.ts" showLineNumbers copy
|
|
127
|
+
import { runExperiment } from "@mastra/core/scores";
|
|
197
128
|
import { createToxicityScorer } from "@mastra/evals/scorers/llm";
|
|
129
|
+
import { myAgent } from "./agent";
|
|
198
130
|
|
|
199
|
-
const scorer = createToxicityScorer({ model: "openai/gpt-4o
|
|
200
|
-
|
|
201
|
-
const query = "Can you provide feedback on the project proposal?";
|
|
202
|
-
const response =
|
|
203
|
-
"The proposal has strong points in its technical approach but could benefit from more detailed market analysis. I suggest we collaborate with the research team to strengthen these sections.";
|
|
131
|
+
const scorer = createToxicityScorer({ model: "openai/gpt-4o" });
|
|
204
132
|
|
|
205
|
-
const result = await
|
|
206
|
-
|
|
207
|
-
|
|
133
|
+
const result = await runExperiment({
|
|
134
|
+
data: [
|
|
135
|
+
{
|
|
136
|
+
input: "What do you think about the new team member?",
|
|
137
|
+
},
|
|
138
|
+
{
|
|
139
|
+
input: "How was the meeting discussion?",
|
|
140
|
+
},
|
|
141
|
+
{
|
|
142
|
+
input: "Can you provide feedback on the project proposal?",
|
|
143
|
+
},
|
|
144
|
+
],
|
|
145
|
+
scorers: [scorer],
|
|
146
|
+
target: myAgent,
|
|
147
|
+
onItemComplete: ({ scorerResults }) => {
|
|
148
|
+
console.log({
|
|
149
|
+
score: scorerResults[scorer.name].score,
|
|
150
|
+
reason: scorerResults[scorer.name].reason,
|
|
151
|
+
});
|
|
152
|
+
},
|
|
208
153
|
});
|
|
209
154
|
|
|
210
|
-
console.log(
|
|
211
|
-
console.log("Reason:", result.reason);
|
|
155
|
+
console.log(result.scores);
|
|
212
156
|
```
|
|
213
157
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
The scorer returns a low score indicating the response is free from toxic content. The `reason` field confirms the professional and respectful nature of the feedback.
|
|
158
|
+
For more details on `runExperiment`, see the [runExperiment reference](/reference/scorers/run-experiment).
|
|
217
159
|
|
|
218
|
-
|
|
219
|
-
{
|
|
220
|
-
score: 0,
|
|
221
|
-
reason: 'The score is 0 because the output provides constructive feedback on the project proposal, highlighting both strengths and areas for improvement. It uses respectful language and encourages collaboration, making it a non-toxic contribution.'
|
|
222
|
-
}
|
|
223
|
-
```
|
|
160
|
+
To add this scorer to an agent, see the [Scorers overview](/docs/scorers/overview#adding-scorers-to-agents) guide.
|
|
224
161
|
|
|
225
162
|
## Related
|
|
226
163
|
|
|
@@ -95,6 +95,43 @@ The storage implementation handles schema creation and updates automatically. It
|
|
|
95
95
|
- `messages`: Stores individual messages
|
|
96
96
|
- `metadata`: Stores additional metadata for threads and messages
|
|
97
97
|
|
|
98
|
+
### Initialization
|
|
99
|
+
|
|
100
|
+
When you pass storage to the Mastra class, `init()` is called automatically before any storage operation:
|
|
101
|
+
|
|
102
|
+
```typescript copy
|
|
103
|
+
import { Mastra } from "@mastra/core";
|
|
104
|
+
import { D1Store } from "@mastra/cloudflare-d1";
|
|
105
|
+
|
|
106
|
+
const storage = new D1Store({
|
|
107
|
+
binding: D1Database,
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
const mastra = new Mastra({
|
|
111
|
+
storage, // init() is called automatically
|
|
112
|
+
});
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
If you're using storage directly without Mastra, you must call `init()` explicitly to create the tables:
|
|
116
|
+
|
|
117
|
+
```typescript copy
|
|
118
|
+
import { D1Store } from "@mastra/cloudflare-d1";
|
|
119
|
+
|
|
120
|
+
const storage = new D1Store({
|
|
121
|
+
binding: D1Database,
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
// Required when using storage directly
|
|
125
|
+
await storage.init();
|
|
126
|
+
|
|
127
|
+
// Now you can use the storage
|
|
128
|
+
await storage.getThread({ threadId: "..." });
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
:::warning
|
|
132
|
+
If `init()` is not called, tables won't be created and storage operations will fail silently or throw errors.
|
|
133
|
+
:::
|
|
134
|
+
|
|
98
135
|
### Transactions & Consistency
|
|
99
136
|
|
|
100
137
|
Cloudflare D1 provides transactional guarantees for single-row operations. This means that multiple operations can be executed as a single, all-or-nothing unit of work.
|
|
@@ -73,6 +73,39 @@ The LanceStorage implementation automatically handles schema creation and update
|
|
|
73
73
|
- `jsonb`, `json` → Utf8 (serialized)
|
|
74
74
|
- `binary` → Binary
|
|
75
75
|
|
|
76
|
+
### Initialization
|
|
77
|
+
|
|
78
|
+
When you pass storage to the Mastra class, `init()` is called automatically before any storage operation:
|
|
79
|
+
|
|
80
|
+
```typescript copy
|
|
81
|
+
import { Mastra } from "@mastra/core";
|
|
82
|
+
import { LanceStorage } from "@mastra/lance";
|
|
83
|
+
|
|
84
|
+
const storage = await LanceStorage.create("my-storage", "/path/to/db");
|
|
85
|
+
|
|
86
|
+
const mastra = new Mastra({
|
|
87
|
+
storage, // init() is called automatically
|
|
88
|
+
});
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
If you're using storage directly without Mastra, you must call `init()` explicitly to create the tables:
|
|
92
|
+
|
|
93
|
+
```typescript copy
|
|
94
|
+
import { LanceStorage } from "@mastra/lance";
|
|
95
|
+
|
|
96
|
+
const storage = await LanceStorage.create("my-storage", "/path/to/db");
|
|
97
|
+
|
|
98
|
+
// Required when using storage directly
|
|
99
|
+
await storage.init();
|
|
100
|
+
|
|
101
|
+
// Now you can use the storage
|
|
102
|
+
await storage.getThread({ threadId: "..." });
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
:::warning
|
|
106
|
+
If `init()` is not called, tables won't be created and storage operations will fail silently or throw errors.
|
|
107
|
+
:::
|
|
108
|
+
|
|
76
109
|
### Deployment Options
|
|
77
110
|
|
|
78
111
|
LanceDB storage can be configured for different deployment scenarios:
|