@mastra/mcp-docs-server 0.13.31 → 0.13.32-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/organized/changelogs/%40internal%2Fexternal-types.md +1 -0
- package/.docs/organized/changelogs/%40mastra%2Fagent-builder.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fai-sdk.md +25 -25
- package/.docs/organized/changelogs/%40mastra%2Fastra.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fchroma.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fclickhouse.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +15 -15
- package/.docs/organized/changelogs/%40mastra%2Fcloud.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fcloudflare-d1.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fcloudflare.md +23 -23
- package/.docs/organized/changelogs/%40mastra%2Fcore.md +122 -122
- package/.docs/organized/changelogs/%40mastra%2Fcouchbase.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloud.md +20 -20
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +19 -19
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-netlify.md +19 -19
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-vercel.md +19 -19
- package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +31 -31
- package/.docs/organized/changelogs/%40mastra%2Fdynamodb.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fevals.md +19 -19
- package/.docs/organized/changelogs/%40mastra%2Flance.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Flibsql.md +23 -23
- package/.docs/organized/changelogs/%40mastra%2Floggers.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +16 -16
- package/.docs/organized/changelogs/%40mastra%2Fmcp-registry-registry.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fmcp.md +14 -14
- package/.docs/organized/changelogs/%40mastra%2Fmemory.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fmongodb.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fmssql.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fopensearch.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fpg.md +21 -21
- package/.docs/organized/changelogs/%40mastra%2Fpinecone.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +35 -35
- package/.docs/organized/changelogs/%40mastra%2Fqdrant.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Frag.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Freact.md +20 -0
- package/.docs/organized/changelogs/%40mastra%2Fs3vectors.md +9 -0
- package/.docs/organized/changelogs/%40mastra%2Fserver.md +37 -37
- package/.docs/organized/changelogs/%40mastra%2Fturbopuffer.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fupstash.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fvectorize.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fvoice-azure.md +13 -13
- package/.docs/organized/changelogs/%40mastra%2Fvoice-cloudflare.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fvoice-deepgram.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fvoice-elevenlabs.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fvoice-gladia.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fvoice-google-gemini-live.md +9 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-google.md +19 -19
- package/.docs/organized/changelogs/%40mastra%2Fvoice-murf.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fvoice-openai-realtime.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fvoice-openai.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fvoice-playai.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fvoice-sarvam.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fvoice-speechify.md +10 -10
- package/.docs/organized/changelogs/create-mastra.md +11 -11
- package/.docs/organized/changelogs/mastra.md +26 -26
- package/.docs/organized/code-examples/agent.md +55 -1
- package/.docs/organized/code-examples/agui.md +2 -2
- package/.docs/organized/code-examples/ai-elements.md +2 -2
- package/.docs/organized/code-examples/ai-sdk-useChat.md +2 -2
- package/.docs/organized/code-examples/ai-sdk-v5.md +2 -2
- package/.docs/organized/code-examples/assistant-ui.md +2 -2
- package/.docs/organized/code-examples/bird-checker-with-nextjs-and-eval.md +2 -2
- package/.docs/organized/code-examples/bird-checker-with-nextjs.md +2 -2
- package/.docs/organized/code-examples/client-side-tools.md +2 -2
- package/.docs/organized/code-examples/crypto-chatbot.md +2 -2
- package/.docs/organized/code-examples/heads-up-game.md +2 -2
- package/.docs/organized/code-examples/openapi-spec-writer.md +2 -2
- package/.docs/raw/agents/agent-memory.mdx +48 -31
- package/.docs/raw/agents/guardrails.mdx +8 -1
- package/.docs/raw/agents/networks.mdx +197 -128
- package/.docs/raw/agents/overview.mdx +10 -9
- package/.docs/raw/frameworks/agentic-uis/ai-sdk.mdx +92 -1
- package/.docs/raw/getting-started/installation.mdx +61 -68
- package/.docs/raw/memory/conversation-history.mdx +2 -2
- package/.docs/raw/memory/semantic-recall.mdx +36 -10
- package/.docs/raw/observability/ai-tracing/overview.mdx +220 -0
- package/.docs/raw/rag/chunking-and-embedding.mdx +19 -7
- package/.docs/raw/reference/cli/create-mastra.mdx +1 -1
- package/.docs/raw/reference/cli/mastra.mdx +1 -1
- package/.docs/raw/reference/client-js/agents.mdx +44 -25
- package/.docs/raw/reference/scorers/answer-relevancy.mdx +3 -6
- package/.docs/raw/reference/scorers/answer-similarity.mdx +7 -13
- package/.docs/raw/reference/scorers/bias.mdx +3 -6
- package/.docs/raw/reference/scorers/completeness.mdx +3 -6
- package/.docs/raw/reference/scorers/context-precision.mdx +6 -9
- package/.docs/raw/reference/scorers/context-relevance.mdx +12 -18
- package/.docs/raw/reference/scorers/faithfulness.mdx +3 -6
- package/.docs/raw/reference/scorers/hallucination.mdx +3 -6
- package/.docs/raw/reference/scorers/noise-sensitivity.mdx +13 -23
- package/.docs/raw/reference/scorers/prompt-alignment.mdx +16 -20
- package/.docs/raw/reference/scorers/tool-call-accuracy.mdx +4 -5
- package/.docs/raw/reference/scorers/toxicity.mdx +3 -6
- package/.docs/raw/reference/workflows/step.mdx +1 -1
- package/.docs/raw/reference/workflows/workflow-methods/sendEvent.mdx +23 -2
- package/.docs/raw/reference/workflows/workflow-methods/sleep.mdx +22 -4
- package/.docs/raw/reference/workflows/workflow-methods/sleepUntil.mdx +14 -4
- package/.docs/raw/reference/workflows/workflow-methods/waitForEvent.mdx +18 -1
- package/.docs/raw/server-db/runtime-context.mdx +13 -3
- package/.docs/raw/streaming/tool-streaming.mdx +30 -0
- package/.docs/raw/tools-mcp/overview.mdx +1 -1
- package/.docs/raw/workflows/overview.mdx +1 -1
- package/.docs/raw/workflows/suspend-and-resume.mdx +34 -23
- package/CHANGELOG.md +15 -0
- package/package.json +5 -5
- package/.docs/raw/workflows/pausing-execution.mdx +0 -142
|
@@ -73,28 +73,40 @@ We go deeper into chunking strategies in our [chunk documentation](/reference/ra
|
|
|
73
73
|
|
|
74
74
|
## Step 2: Embedding Generation
|
|
75
75
|
|
|
76
|
-
Transform chunks into embeddings using your preferred provider. Mastra supports
|
|
76
|
+
Transform chunks into embeddings using your preferred provider. Mastra supports embedding models through the model router or AI SDK packages.
|
|
77
77
|
|
|
78
|
-
### Using
|
|
78
|
+
### Using the Model Router (Recommended)
|
|
79
|
+
|
|
80
|
+
The simplest way is to use Mastra's model router with `provider/model` strings:
|
|
79
81
|
|
|
80
82
|
```ts showLineNumbers copy
|
|
81
|
-
import {
|
|
83
|
+
import { ModelRouterEmbeddingModel } from "@mastra/core";
|
|
82
84
|
import { embedMany } from "ai";
|
|
83
85
|
|
|
86
|
+
const embeddingModel = new ModelRouterEmbeddingModel("openai/text-embedding-3-small");
|
|
87
|
+
|
|
84
88
|
const { embeddings } = await embedMany({
|
|
85
|
-
model:
|
|
89
|
+
model: embeddingModel,
|
|
86
90
|
values: chunks.map((chunk) => chunk.text),
|
|
87
91
|
});
|
|
88
92
|
```
|
|
89
93
|
|
|
90
|
-
|
|
94
|
+
Supported embedding models:
|
|
95
|
+
- **OpenAI**: `text-embedding-3-small`, `text-embedding-3-large`, `text-embedding-ada-002`
|
|
96
|
+
- **Google**: `gemini-embedding-001`, `text-embedding-004`
|
|
97
|
+
|
|
98
|
+
The model router automatically handles API key detection from environment variables.
|
|
99
|
+
|
|
100
|
+
### Using AI SDK Packages
|
|
101
|
+
|
|
102
|
+
You can also use AI SDK embedding models directly:
|
|
91
103
|
|
|
92
104
|
```ts showLineNumbers copy
|
|
93
|
-
import {
|
|
105
|
+
import { openai } from "@ai-sdk/openai";
|
|
94
106
|
import { embedMany } from "ai";
|
|
95
107
|
|
|
96
108
|
const { embeddings } = await embedMany({
|
|
97
|
-
model:
|
|
109
|
+
model: openai.embedding("text-embedding-3-small"),
|
|
98
110
|
values: chunks.map((chunk) => chunk.text),
|
|
99
111
|
});
|
|
100
112
|
```
|
|
@@ -96,7 +96,7 @@ Instead of an interactive prompt you can also define these CLI flags.
|
|
|
96
96
|
name: "--components",
|
|
97
97
|
type: "string",
|
|
98
98
|
description:
|
|
99
|
-
"Comma-separated list of components (agents, tools, workflows)",
|
|
99
|
+
"Comma-separated list of components (agents, tools, workflows, scorers)",
|
|
100
100
|
isOptional: true,
|
|
101
101
|
},
|
|
102
102
|
{
|
|
@@ -173,7 +173,7 @@ The directory where Mastra files should be saved to. Defaults to `src`.
|
|
|
173
173
|
|
|
174
174
|
#### `--components`
|
|
175
175
|
|
|
176
|
-
Comma-separated list of components to add. For each component a new folder will be created. Defaults to `['agents', 'tools', 'workflows']`.
|
|
176
|
+
Comma-separated list of components to add. For each component a new folder will be created. Choose from: `"agents" | "tools" | "workflows" | "scorers"`. Defaults to `['agents', 'tools', 'workflows']`.
|
|
177
177
|
|
|
178
178
|
#### `--llm`
|
|
179
179
|
|
|
@@ -67,27 +67,11 @@ const response = await agent.stream({
|
|
|
67
67
|
|
|
68
68
|
// Process data stream with the processDataStream util
|
|
69
69
|
response.processDataStream({
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
},
|
|
73
|
-
onFilePart: (file) => {
|
|
74
|
-
console.log(file);
|
|
75
|
-
},
|
|
76
|
-
onDataPart: (data) => {
|
|
77
|
-
console.log(data);
|
|
78
|
-
},
|
|
79
|
-
onErrorPart: (error) => {
|
|
80
|
-
console.error(error);
|
|
70
|
+
onChunk: async(chunk) => {
|
|
71
|
+
console.log(chunk);
|
|
81
72
|
},
|
|
82
73
|
});
|
|
83
74
|
|
|
84
|
-
// Process text stream with the processTextStream util
|
|
85
|
-
// (used with structured output)
|
|
86
|
-
response.processTextStream({
|
|
87
|
-
onTextPart: text => {
|
|
88
|
-
process.stdout.write(text);
|
|
89
|
-
},
|
|
90
|
-
});
|
|
91
75
|
|
|
92
76
|
// You can also read from response body directly
|
|
93
77
|
const reader = response.body.getReader();
|
|
@@ -134,8 +118,13 @@ const response = await agent.stream({
|
|
|
134
118
|
});
|
|
135
119
|
|
|
136
120
|
response.processDataStream({
|
|
137
|
-
|
|
138
|
-
|
|
121
|
+
onChunk: async (chunk) => {
|
|
122
|
+
if (chunk.type === 'text-delta') {
|
|
123
|
+
console.log(chunk.payload.text);
|
|
124
|
+
} else if (chunk.type === 'tool-call') {
|
|
125
|
+
console.log(`calling tool ${chunk.payload.toolName} with args ${JSON.stringify(chunk.payload.args, null, 2)}`);
|
|
126
|
+
}
|
|
127
|
+
},
|
|
139
128
|
});
|
|
140
129
|
```
|
|
141
130
|
|
|
@@ -176,15 +165,45 @@ const response = await agent.stream(
|
|
|
176
165
|
|
|
177
166
|
// Process the stream
|
|
178
167
|
response.processDataStream({
|
|
179
|
-
onChunk: (chunk) => {
|
|
180
|
-
|
|
168
|
+
onChunk: async (chunk) => {
|
|
169
|
+
if (chunk.type === 'text-delta') {
|
|
170
|
+
console.log(chunk.payload.text);
|
|
171
|
+
}
|
|
181
172
|
},
|
|
182
173
|
});
|
|
183
174
|
```
|
|
184
175
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
176
|
+
#### AI SDK compatible format
|
|
177
|
+
|
|
178
|
+
To stream AI SDK-formatted parts on the client from an `agent.stream(...)` response, wrap `response.processDataStream` into a `ReadableStream<ChunkType>` and use `toAISdkFormat`:
|
|
179
|
+
|
|
180
|
+
```typescript filename="client-ai-sdk-transform.ts" copy
|
|
181
|
+
import { createUIMessageStream } from 'ai';
|
|
182
|
+
import { toAISdkFormat } from '@mastra/ai-sdk';
|
|
183
|
+
import type { ChunkType, MastraModelOutput } from '@mastra/core/stream';
|
|
184
|
+
|
|
185
|
+
const response = await agent.stream({ messages: 'Tell me a story' });
|
|
186
|
+
|
|
187
|
+
const chunkStream: ReadableStream<ChunkType> = new ReadableStream<ChunkType>({
|
|
188
|
+
start(controller) {
|
|
189
|
+
response.processDataStream({
|
|
190
|
+
onChunk: async (chunk) => controller.enqueue(chunk as ChunkType),
|
|
191
|
+
}).finally(() => controller.close());
|
|
192
|
+
},
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
const uiMessageStream = createUIMessageStream({
|
|
196
|
+
execute: async ({ writer }) => {
|
|
197
|
+
for await (const part of toAISdkFormat(chunkStream as unknown as MastraModelOutput, { from: 'agent' })) {
|
|
198
|
+
writer.write(part);
|
|
199
|
+
}
|
|
200
|
+
},
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
for await (const part of uiMessageStream) {
|
|
204
|
+
console.log(part);
|
|
205
|
+
}
|
|
206
|
+
```
|
|
188
207
|
|
|
189
208
|
### Generate
|
|
190
209
|
|
|
@@ -116,10 +116,9 @@ A relevancy score between 0 and 1:
|
|
|
116
116
|
In this example, the response accurately addresses the input query with specific and relevant information.
|
|
117
117
|
|
|
118
118
|
```typescript filename="src/example-high-answer-relevancy.ts" showLineNumbers copy
|
|
119
|
-
import { openai } from "@ai-sdk/openai";
|
|
120
119
|
import { createAnswerRelevancyScorer } from "@mastra/evals/scorers/llm";
|
|
121
120
|
|
|
122
|
-
const scorer = createAnswerRelevancyScorer({ model: openai
|
|
121
|
+
const scorer = createAnswerRelevancyScorer({ model: 'openai/gpt-4o-mini' });
|
|
123
122
|
|
|
124
123
|
const inputMessages = [{ role: 'user', content: "What are the health benefits of regular exercise?" }];
|
|
125
124
|
const outputMessage = { text: "Regular exercise improves cardiovascular health, strengthens muscles, boosts metabolism, and enhances mental well-being through the release of endorphins." };
|
|
@@ -148,10 +147,9 @@ The output receives a high score because it accurately answers the query without
|
|
|
148
147
|
In this example, the response addresses the query in part but includes additional information that isn’t directly relevant.
|
|
149
148
|
|
|
150
149
|
```typescript filename="src/example-partial-answer-relevancy.ts" showLineNumbers copy
|
|
151
|
-
import { openai } from "@ai-sdk/openai";
|
|
152
150
|
import { createAnswerRelevancyScorer } from "@mastra/evals/scorers/llm";
|
|
153
151
|
|
|
154
|
-
const scorer = createAnswerRelevancyScorer({ model: openai
|
|
152
|
+
const scorer = createAnswerRelevancyScorer({ model: 'openai/gpt-4o-mini' });
|
|
155
153
|
|
|
156
154
|
const inputMessages = [{ role: 'user', content: "What should a healthy breakfast include?" }];
|
|
157
155
|
const outputMessage = { text: "A nutritious breakfast should include whole grains and protein. However, the timing of your breakfast is just as important - studies show eating within 2 hours of waking optimizes metabolism and energy levels throughout the day." };
|
|
@@ -180,10 +178,9 @@ The output receives a lower score because it partially answers the query. While
|
|
|
180
178
|
In this example, the response does not address the query and contains information that is entirely unrelated.
|
|
181
179
|
|
|
182
180
|
```typescript filename="src/example-low-answer-relevancy.ts" showLineNumbers copy
|
|
183
|
-
import { openai } from "@ai-sdk/openai";
|
|
184
181
|
import { createAnswerRelevancyScorer } from "@mastra/evals/scorers/llm";
|
|
185
182
|
|
|
186
|
-
const scorer = createAnswerRelevancyScorer({ model: openai
|
|
183
|
+
const scorer = createAnswerRelevancyScorer({ model: 'openai/gpt-4o-mini' });
|
|
187
184
|
|
|
188
185
|
const inputMessages = [{ role: 'user', content: "What are the benefits of meditation?" }];
|
|
189
186
|
const outputMessage = { text: "The Great Wall of China is over 13,000 miles long and was built during the Ming Dynasty to protect against invasions." };
|
|
@@ -175,12 +175,11 @@ await runExperiment({
|
|
|
175
175
|
In this example, the agent's output semantically matches the ground truth perfectly.
|
|
176
176
|
|
|
177
177
|
```typescript filename="src/example-perfect-similarity.ts" showLineNumbers copy
|
|
178
|
-
import { openai } from "@ai-sdk/openai";
|
|
179
178
|
import { runExperiment } from "@mastra/core/scores";
|
|
180
179
|
import { createAnswerSimilarityScorer } from "@mastra/evals/scorers/llm";
|
|
181
180
|
import { myAgent } from "./agent";
|
|
182
181
|
|
|
183
|
-
const scorer = createAnswerSimilarityScorer({ model: openai
|
|
182
|
+
const scorer = createAnswerSimilarityScorer({ model: 'openai/gpt-4o-mini' });
|
|
184
183
|
|
|
185
184
|
const result = await runExperiment({
|
|
186
185
|
data: [
|
|
@@ -214,12 +213,11 @@ The output receives a perfect score because both the agent's answer and ground t
|
|
|
214
213
|
In this example, the agent provides the same information as the ground truth but with different phrasing.
|
|
215
214
|
|
|
216
215
|
```typescript filename="src/example-semantic-similarity.ts" showLineNumbers copy
|
|
217
|
-
import { openai } from "@ai-sdk/openai";
|
|
218
216
|
import { runExperiment } from "@mastra/core/scores";
|
|
219
217
|
import { createAnswerSimilarityScorer } from "@mastra/evals/scorers/llm";
|
|
220
218
|
import { myAgent } from "./agent";
|
|
221
219
|
|
|
222
|
-
const scorer = createAnswerSimilarityScorer({ model: openai
|
|
220
|
+
const scorer = createAnswerSimilarityScorer({ model: 'openai/gpt-4o-mini' });
|
|
223
221
|
|
|
224
222
|
const result = await runExperiment({
|
|
225
223
|
data: [
|
|
@@ -253,12 +251,11 @@ The output receives a high score because it conveys the same information with eq
|
|
|
253
251
|
In this example, the agent's response is partially correct but missing key information.
|
|
254
252
|
|
|
255
253
|
```typescript filename="src/example-partial-similarity.ts" showLineNumbers copy
|
|
256
|
-
import { openai } from "@ai-sdk/openai";
|
|
257
254
|
import { runExperiment } from "@mastra/core/scores";
|
|
258
255
|
import { createAnswerSimilarityScorer } from "@mastra/evals/scorers/llm";
|
|
259
256
|
import { myAgent } from "./agent";
|
|
260
257
|
|
|
261
|
-
const scorer = createAnswerSimilarityScorer({ model: openai
|
|
258
|
+
const scorer = createAnswerSimilarityScorer({ model: 'openai/gpt-4o-mini' });
|
|
262
259
|
|
|
263
260
|
const result = await runExperiment({
|
|
264
261
|
data: [
|
|
@@ -292,12 +289,11 @@ The output receives a moderate score because it includes some correct informatio
|
|
|
292
289
|
In this example, the agent provides factually incorrect information that contradicts the ground truth.
|
|
293
290
|
|
|
294
291
|
```typescript filename="src/example-contradiction.ts" showLineNumbers copy
|
|
295
|
-
import { openai } from "@ai-sdk/openai";
|
|
296
292
|
import { runExperiment } from "@mastra/core/scores";
|
|
297
293
|
import { createAnswerSimilarityScorer } from "@mastra/evals/scorers/llm";
|
|
298
294
|
import { myAgent } from "./agent";
|
|
299
295
|
|
|
300
|
-
const scorer = createAnswerSimilarityScorer({ model: openai
|
|
296
|
+
const scorer = createAnswerSimilarityScorer({ model: 'openai/gpt-4o-mini' });
|
|
301
297
|
|
|
302
298
|
const result = await runExperiment({
|
|
303
299
|
data: [
|
|
@@ -332,13 +328,12 @@ Use the scorer in your test suites to ensure agent consistency over time:
|
|
|
332
328
|
|
|
333
329
|
```typescript filename="src/ci-integration.test.ts" showLineNumbers copy
|
|
334
330
|
import { describe, it, expect } from 'vitest';
|
|
335
|
-
import { openai } from "@ai-sdk/openai";
|
|
336
331
|
import { runExperiment } from "@mastra/core/scores";
|
|
337
332
|
import { createAnswerSimilarityScorer } from "@mastra/evals/scorers/llm";
|
|
338
333
|
import { myAgent } from "./agent";
|
|
339
334
|
|
|
340
335
|
describe('Agent Consistency Tests', () => {
|
|
341
|
-
const scorer = createAnswerSimilarityScorer({ model: openai
|
|
336
|
+
const scorer = createAnswerSimilarityScorer({ model: 'openai/gpt-4o-mini' });
|
|
342
337
|
|
|
343
338
|
it('should provide accurate factual answers', async () => {
|
|
344
339
|
const result = await runExperiment({
|
|
@@ -386,14 +381,13 @@ describe('Agent Consistency Tests', () => {
|
|
|
386
381
|
Customize the scorer behavior for specific use cases:
|
|
387
382
|
|
|
388
383
|
```typescript filename="src/custom-config.ts" showLineNumbers copy
|
|
389
|
-
import { openai } from "@ai-sdk/openai";
|
|
390
384
|
import { runExperiment } from "@mastra/core/scores";
|
|
391
385
|
import { createAnswerSimilarityScorer } from "@mastra/evals/scorers/llm";
|
|
392
386
|
import { myAgent } from "./agent";
|
|
393
387
|
|
|
394
388
|
// Configure for strict exact matching with high scale
|
|
395
389
|
const strictScorer = createAnswerSimilarityScorer({
|
|
396
|
-
model: openai
|
|
390
|
+
model: 'openai/gpt-4o-mini',
|
|
397
391
|
options: {
|
|
398
392
|
exactMatchBonus: 0.5, // Higher bonus for exact matches
|
|
399
393
|
contradictionPenalty: 2.0, // Very strict on contradictions
|
|
@@ -404,7 +398,7 @@ const strictScorer = createAnswerSimilarityScorer({
|
|
|
404
398
|
|
|
405
399
|
// Configure for lenient semantic matching
|
|
406
400
|
const lenientScorer = createAnswerSimilarityScorer({
|
|
407
|
-
model: openai
|
|
401
|
+
model: 'openai/gpt-4o-mini',
|
|
408
402
|
options: {
|
|
409
403
|
semanticThreshold: 0.6, // Lower threshold for semantic matches
|
|
410
404
|
contradictionPenalty: 0.5, // More forgiving on minor contradictions
|
|
@@ -127,10 +127,9 @@ A bias score between 0 and 1:
|
|
|
127
127
|
In this example, the response contains overtly biased language and generalizations. The assumptions are explicit and strongly reinforce stereotypes or discriminatory views.
|
|
128
128
|
|
|
129
129
|
```typescript filename="src/example-high-bias.ts" showLineNumbers copy
|
|
130
|
-
import { openai } from "@ai-sdk/openai";
|
|
131
130
|
import { createBiasScorer } from "@mastra/evals/scorers/llm";
|
|
132
131
|
|
|
133
|
-
const scorer = createBiasScorer({ model: openai
|
|
132
|
+
const scorer = createBiasScorer({ model: 'openai/gpt-4o-mini' });
|
|
134
133
|
|
|
135
134
|
const query = "What makes someone a good leader?";
|
|
136
135
|
const response =
|
|
@@ -160,10 +159,9 @@ The output receives a high score because it's a clear example of discriminatory
|
|
|
160
159
|
In this example, the response contains a mix of reasonable points with subtle bias. While parts of the response are fair, underlying bias is still present.
|
|
161
160
|
|
|
162
161
|
```typescript filename="src/example-mixed-bias.ts" showLineNumbers copy
|
|
163
|
-
import { openai } from "@ai-sdk/openai";
|
|
164
162
|
import { createBiasScorer } from "@mastra/evals/scorers/llm";
|
|
165
163
|
|
|
166
|
-
const scorer = createBiasScorer({ model: openai
|
|
164
|
+
const scorer = createBiasScorer({ model: 'openai/gpt-4o-mini' });
|
|
167
165
|
|
|
168
166
|
const query = "How do different age groups perform at work?";
|
|
169
167
|
const response =
|
|
@@ -193,10 +191,9 @@ The output receives a lower score because the response introduces bias in a more
|
|
|
193
191
|
In this example, the response focuses on objective and neutral criteria without introducing biased assumptions.
|
|
194
192
|
|
|
195
193
|
```typescript filename="src/example-low-bias.ts" showLineNumbers copy
|
|
196
|
-
import { openai } from "@ai-sdk/openai";
|
|
197
194
|
import { createBiasScorer } from "@mastra/evals/scorers/llm";
|
|
198
195
|
|
|
199
|
-
const scorer = createBiasScorer({ model: openai
|
|
196
|
+
const scorer = createBiasScorer({ model: 'openai/gpt-4o-mini' });
|
|
200
197
|
|
|
201
198
|
const query = "What is the best hiring practice?";
|
|
202
199
|
const response =
|
|
@@ -110,10 +110,9 @@ A completeness score between 0 and 1:
|
|
|
110
110
|
In this example, the response comprehensively addresses all aspects of the query with detailed information covering multiple dimensions.
|
|
111
111
|
|
|
112
112
|
```typescript filename="src/example-high-completeness.ts" showLineNumbers copy
|
|
113
|
-
import { openai } from "@ai-sdk/openai";
|
|
114
113
|
import { createCompletenessScorer } from "@mastra/evals/scorers/llm";
|
|
115
114
|
|
|
116
|
-
const scorer = createCompletenessScorer({ model: openai
|
|
115
|
+
const scorer = createCompletenessScorer({ model: 'openai/gpt-4o-mini' });
|
|
117
116
|
|
|
118
117
|
const query = "Explain the process of photosynthesis, including the inputs, outputs, and stages involved.";
|
|
119
118
|
const response =
|
|
@@ -143,10 +142,9 @@ The output receives a high score because it addresses all requested aspects: inp
|
|
|
143
142
|
In this example, the response addresses some key points but misses important aspects or lacks sufficient detail.
|
|
144
143
|
|
|
145
144
|
```typescript filename="src/example-partial-completeness.ts" showLineNumbers copy
|
|
146
|
-
import { openai } from "@ai-sdk/openai";
|
|
147
145
|
import { createCompletenessScorer } from "@mastra/evals/scorers/llm";
|
|
148
146
|
|
|
149
|
-
const scorer = createCompletenessScorer({ model: openai
|
|
147
|
+
const scorer = createCompletenessScorer({ model: 'openai/gpt-4o-mini' });
|
|
150
148
|
|
|
151
149
|
const query = "What are the benefits and drawbacks of remote work for both employees and employers?";
|
|
152
150
|
const response =
|
|
@@ -176,10 +174,9 @@ The output receives a moderate score because it covers employee benefits and som
|
|
|
176
174
|
In this example, the response only partially addresses the query and misses several important aspects.
|
|
177
175
|
|
|
178
176
|
```typescript filename="src/example-low-completeness.ts" showLineNumbers copy
|
|
179
|
-
import { openai } from "@ai-sdk/openai";
|
|
180
177
|
import { createCompletenessScorer } from "@mastra/evals/scorers/llm";
|
|
181
178
|
|
|
182
|
-
const scorer = createCompletenessScorer({ model: openai
|
|
179
|
+
const scorer = createCompletenessScorer({ model: 'openai/gpt-4o-mini' });
|
|
183
180
|
|
|
184
181
|
const query = "Compare renewable and non-renewable energy sources in terms of cost, environmental impact, and sustainability.";
|
|
185
182
|
const response =
|
|
@@ -31,7 +31,7 @@ Use when optimizing context selection for:
|
|
|
31
31
|
content={[
|
|
32
32
|
{
|
|
33
33
|
name: "model",
|
|
34
|
-
type: "
|
|
34
|
+
type: "MastraModelConfig",
|
|
35
35
|
description: "The language model to use for evaluating context relevance",
|
|
36
36
|
required: true,
|
|
37
37
|
},
|
|
@@ -146,7 +146,7 @@ MAP = (1.0 + 0.67) / 2 = 0.835 ≈ **0.83**
|
|
|
146
146
|
|
|
147
147
|
```typescript
|
|
148
148
|
const scorer = createContextPrecisionScorer({
|
|
149
|
-
model: openai
|
|
149
|
+
model: 'openai/gpt-4o-mini',
|
|
150
150
|
options: {
|
|
151
151
|
contextExtractor: (input, output) => {
|
|
152
152
|
// Extract context dynamically based on the query
|
|
@@ -165,7 +165,7 @@ const scorer = createContextPrecisionScorer({
|
|
|
165
165
|
|
|
166
166
|
```typescript
|
|
167
167
|
const scorer = createContextPrecisionScorer({
|
|
168
|
-
model: openai
|
|
168
|
+
model: 'openai/gpt-4o-mini',
|
|
169
169
|
options: {
|
|
170
170
|
context: [
|
|
171
171
|
// Simulate retrieved documents from vector database
|
|
@@ -187,11 +187,10 @@ const scorer = createContextPrecisionScorer({
|
|
|
187
187
|
This example shows perfect context precision where all relevant context appears early:
|
|
188
188
|
|
|
189
189
|
```typescript
|
|
190
|
-
import { openai } from '@ai-sdk/openai';
|
|
191
190
|
import { createContextPrecisionScorer } from '@mastra/evals';
|
|
192
191
|
|
|
193
192
|
const scorer = createContextPrecisionScorer({
|
|
194
|
-
model: openai
|
|
193
|
+
model: 'openai/gpt-4o-mini',
|
|
195
194
|
options: {
|
|
196
195
|
context: [
|
|
197
196
|
'Photosynthesis is the process by which plants convert sunlight, carbon dioxide, and water into glucose and oxygen.',
|
|
@@ -234,11 +233,10 @@ console.log(result);
|
|
|
234
233
|
This example shows moderate precision with both relevant and irrelevant context:
|
|
235
234
|
|
|
236
235
|
```typescript
|
|
237
|
-
import { openai } from '@ai-sdk/openai';
|
|
238
236
|
import { createContextPrecisionScorer } from '@mastra/evals';
|
|
239
237
|
|
|
240
238
|
const scorer = createContextPrecisionScorer({
|
|
241
|
-
model: openai
|
|
239
|
+
model: 'openai/gpt-4o-mini',
|
|
242
240
|
options: {
|
|
243
241
|
context: [
|
|
244
242
|
'Regular exercise improves cardiovascular health by strengthening the heart muscle.',
|
|
@@ -283,11 +281,10 @@ console.log(result);
|
|
|
283
281
|
This example shows poor context precision with mostly irrelevant context:
|
|
284
282
|
|
|
285
283
|
```typescript
|
|
286
|
-
import { openai } from '@ai-sdk/openai';
|
|
287
284
|
import { createContextPrecisionScorer } from '@mastra/evals';
|
|
288
285
|
|
|
289
286
|
const scorer = createContextPrecisionScorer({
|
|
290
|
-
model: openai
|
|
287
|
+
model: 'openai/gpt-4o-mini',
|
|
291
288
|
options: {
|
|
292
289
|
context: [
|
|
293
290
|
'The weather forecast shows sunny skies this weekend.',
|
|
@@ -31,7 +31,7 @@ Use when optimizing for:
|
|
|
31
31
|
content={[
|
|
32
32
|
{
|
|
33
33
|
name: "model",
|
|
34
|
-
type: "
|
|
34
|
+
type: "MastraModelConfig",
|
|
35
35
|
description: "The language model to use for evaluating context relevance",
|
|
36
36
|
required: true,
|
|
37
37
|
},
|
|
@@ -185,12 +185,11 @@ Use results to improve your system:
|
|
|
185
185
|
Control how penalties are applied for unused and missing context:
|
|
186
186
|
|
|
187
187
|
```typescript
|
|
188
|
-
import { openai } from '@ai-sdk/openai';
|
|
189
188
|
import { createContextRelevanceScorerLLM } from '@mastra/evals';
|
|
190
189
|
|
|
191
190
|
// Stricter penalty configuration
|
|
192
191
|
const strictScorer = createContextRelevanceScorerLLM({
|
|
193
|
-
model: openai
|
|
192
|
+
model: 'openai/gpt-4o-mini',
|
|
194
193
|
options: {
|
|
195
194
|
context: [
|
|
196
195
|
'Einstein won the Nobel Prize for photoelectric effect',
|
|
@@ -208,7 +207,7 @@ const strictScorer = createContextRelevanceScorerLLM({
|
|
|
208
207
|
|
|
209
208
|
// Lenient penalty configuration
|
|
210
209
|
const lenientScorer = createContextRelevanceScorerLLM({
|
|
211
|
-
model: openai
|
|
210
|
+
model: 'openai/gpt-4o-mini',
|
|
212
211
|
options: {
|
|
213
212
|
context: [
|
|
214
213
|
'Einstein won the Nobel Prize for photoelectric effect',
|
|
@@ -254,7 +253,7 @@ console.log('Lenient penalties:', lenientResult.score); // Higher score, less pe
|
|
|
254
253
|
|
|
255
254
|
```typescript
|
|
256
255
|
const scorer = createContextRelevanceScorerLLM({
|
|
257
|
-
model: openai
|
|
256
|
+
model: 'openai/gpt-4o',
|
|
258
257
|
options: {
|
|
259
258
|
contextExtractor: (input, output) => {
|
|
260
259
|
// Extract context based on the query
|
|
@@ -278,7 +277,7 @@ const scorer = createContextRelevanceScorerLLM({
|
|
|
278
277
|
|
|
279
278
|
```typescript
|
|
280
279
|
const scorer = createContextRelevanceScorerLLM({
|
|
281
|
-
model: openai
|
|
280
|
+
model: 'openai/gpt-4o-mini',
|
|
282
281
|
options: {
|
|
283
282
|
context: [
|
|
284
283
|
'Relevant information...',
|
|
@@ -295,7 +294,7 @@ const scorer = createContextRelevanceScorerLLM({
|
|
|
295
294
|
|
|
296
295
|
```typescript
|
|
297
296
|
const scorer = createContextRelevanceScorerLLM({
|
|
298
|
-
model: openai
|
|
297
|
+
model: 'openai/gpt-4o-mini',
|
|
299
298
|
options: {
|
|
300
299
|
contextExtractor: (input, output) => {
|
|
301
300
|
const query = input?.inputMessages?.[0]?.content || '';
|
|
@@ -323,11 +322,10 @@ const scorer = createContextRelevanceScorerLLM({
|
|
|
323
322
|
This example shows excellent context relevance where all context directly supports the response:
|
|
324
323
|
|
|
325
324
|
```typescript
|
|
326
|
-
import { openai } from '@ai-sdk/openai';
|
|
327
325
|
import { createContextRelevanceScorerLLM } from '@mastra/evals';
|
|
328
326
|
|
|
329
327
|
const scorer = createContextRelevanceScorerLLM({
|
|
330
|
-
model: openai
|
|
328
|
+
model: 'openai/gpt-4o-mini',
|
|
331
329
|
options: {
|
|
332
330
|
context: [
|
|
333
331
|
'Einstein won the Nobel Prize for his discovery of the photoelectric effect in 1921.',
|
|
@@ -370,11 +368,10 @@ console.log(result);
|
|
|
370
368
|
This example shows moderate relevance with some context being irrelevant or unused:
|
|
371
369
|
|
|
372
370
|
```typescript
|
|
373
|
-
import { openai } from '@ai-sdk/openai';
|
|
374
371
|
import { createContextRelevanceScorerLLM } from '@mastra/evals';
|
|
375
372
|
|
|
376
373
|
const scorer = createContextRelevanceScorerLLM({
|
|
377
|
-
model: openai
|
|
374
|
+
model: 'openai/gpt-4o-mini',
|
|
378
375
|
options: {
|
|
379
376
|
context: [
|
|
380
377
|
'Solar eclipses occur when the Moon blocks the Sun.',
|
|
@@ -415,7 +412,7 @@ console.log(result);
|
|
|
415
412
|
|
|
416
413
|
// With custom penalty configuration
|
|
417
414
|
const customScorer = createContextRelevanceScorerLLM({
|
|
418
|
-
model: openai
|
|
415
|
+
model: 'openai/gpt-4o-mini',
|
|
419
416
|
options: {
|
|
420
417
|
context: [
|
|
421
418
|
'Solar eclipses occur when the Moon blocks the Sun.',
|
|
@@ -450,11 +447,10 @@ console.log(customResult);
|
|
|
450
447
|
This example shows poor context relevance with mostly irrelevant information:
|
|
451
448
|
|
|
452
449
|
```typescript
|
|
453
|
-
import { openai } from '@ai-sdk/openai';
|
|
454
450
|
import { createContextRelevanceScorerLLM } from '@mastra/evals';
|
|
455
451
|
|
|
456
452
|
const scorer = createContextRelevanceScorerLLM({
|
|
457
|
-
model: openai
|
|
453
|
+
model: 'openai/gpt-4o-mini',
|
|
458
454
|
options: {
|
|
459
455
|
context: [
|
|
460
456
|
'The Great Barrier Reef is located in Australia.',
|
|
@@ -499,11 +495,10 @@ console.log(result);
|
|
|
499
495
|
Extract context dynamically based on the run input:
|
|
500
496
|
|
|
501
497
|
```typescript
|
|
502
|
-
import { openai } from '@ai-sdk/openai';
|
|
503
498
|
import { createContextRelevanceScorerLLM } from '@mastra/evals';
|
|
504
499
|
|
|
505
500
|
const scorer = createContextRelevanceScorerLLM({
|
|
506
|
-
model: openai
|
|
501
|
+
model: 'openai/gpt-4o-mini',
|
|
507
502
|
options: {
|
|
508
503
|
contextExtractor: (input, output) => {
|
|
509
504
|
// Extract query from input
|
|
@@ -543,11 +538,10 @@ const scorer = createContextRelevanceScorerLLM({
|
|
|
543
538
|
Integrate with RAG pipelines to evaluate retrieved context:
|
|
544
539
|
|
|
545
540
|
```typescript
|
|
546
|
-
import { openai } from '@ai-sdk/openai';
|
|
547
541
|
import { createContextRelevanceScorerLLM } from '@mastra/evals';
|
|
548
542
|
|
|
549
543
|
const scorer = createContextRelevanceScorerLLM({
|
|
550
|
-
model: openai
|
|
544
|
+
model: 'openai/gpt-4o-mini',
|
|
551
545
|
options: {
|
|
552
546
|
contextExtractor: (input, output) => {
|
|
553
547
|
// Extract from RAG retrieval results
|
|
@@ -121,10 +121,9 @@ A faithfulness score between 0 and 1:
|
|
|
121
121
|
In this example, the response closely aligns with the context. Each statement in the output is verifiable and supported by the provided context entries, resulting in a high score.
|
|
122
122
|
|
|
123
123
|
```typescript filename="src/example-high-faithfulness.ts" showLineNumbers copy
|
|
124
|
-
import { openai } from "@ai-sdk/openai";
|
|
125
124
|
import { createFaithfulnessScorer } from "@mastra/evals/scorers/llm";
|
|
126
125
|
|
|
127
|
-
const scorer = createFaithfulnessScorer({ model: openai
|
|
126
|
+
const scorer = createFaithfulnessScorer({ model: 'openai/gpt-4o-mini', options: {
|
|
128
127
|
context: [
|
|
129
128
|
"The Tesla Model 3 was launched in 2017.",
|
|
130
129
|
"It has a range of up to 358 miles.",
|
|
@@ -159,10 +158,9 @@ The output receives a score of 1 because all the information it provides can be
|
|
|
159
158
|
In this example, there are a mix of supported and unsupported claims. Some parts of the response are backed by the context, while others introduce new information not found in the source material.
|
|
160
159
|
|
|
161
160
|
```typescript filename="src/example-mixed-faithfulness.ts" showLineNumbers copy
|
|
162
|
-
import { openai } from "@ai-sdk/openai";
|
|
163
161
|
import { createFaithfulnessScorer } from "@mastra/evals/scorers/llm";
|
|
164
162
|
|
|
165
|
-
const scorer = createFaithfulnessScorer({ model: openai
|
|
163
|
+
const scorer = createFaithfulnessScorer({ model: 'openai/gpt-4o-mini', options: {
|
|
166
164
|
context: [
|
|
167
165
|
"Python was created by Guido van Rossum.",
|
|
168
166
|
"The first version was released in 1991.",
|
|
@@ -197,10 +195,9 @@ The score is lower because only a portion of the response is verifiable. While s
|
|
|
197
195
|
In this example, the response directly contradicts the context. None of the claims are supported, and several conflict with the facts provided.
|
|
198
196
|
|
|
199
197
|
```typescript filename="src/example-low-faithfulness.ts" showLineNumbers copy
|
|
200
|
-
import { openai } from "@ai-sdk/openai";
|
|
201
198
|
import { createFaithfulnessScorer } from "@mastra/evals/scorers/llm";
|
|
202
199
|
|
|
203
|
-
const scorer = createFaithfulnessScorer({ model: openai
|
|
200
|
+
const scorer = createFaithfulnessScorer({ model: 'openai/gpt-4o-mini', options: {
|
|
204
201
|
context: [
|
|
205
202
|
"Mars is the fourth planet from the Sun.",
|
|
206
203
|
"It has a thin atmosphere of mostly carbon dioxide.",
|
|
@@ -132,10 +132,9 @@ A hallucination score between 0 and 1:
|
|
|
132
132
|
In this example, the response is fully aligned with the provided context. All claims are factually correct and directly supported by the source material, resulting in a low hallucination score.
|
|
133
133
|
|
|
134
134
|
```typescript filename="src/example-no-hallucination.ts" showLineNumbers copy
|
|
135
|
-
import { openai } from "@ai-sdk/openai";
|
|
136
135
|
import { createHallucinationScorer } from "@mastra/evals/scorers/llm";
|
|
137
136
|
|
|
138
|
-
const scorer = createHallucinationScorer({ model: openai
|
|
137
|
+
const scorer = createHallucinationScorer({ model: 'openai/gpt-4o-mini', options: {
|
|
139
138
|
context: [
|
|
140
139
|
"The iPhone was first released in 2007.",
|
|
141
140
|
"Steve Jobs unveiled it at Macworld.",
|
|
@@ -170,10 +169,9 @@ The response receives a score of 0 because there are no contradictions. Every st
|
|
|
170
169
|
In this example, the response includes both accurate and inaccurate claims. Some details align with the context, while others directly contradict it—such as inflated numbers or incorrect locations. These contradictions increase the hallucination score.
|
|
171
170
|
|
|
172
171
|
```typescript filename="src/example-mixed-hallucination.ts" showLineNumbers copy
|
|
173
|
-
import { openai } from "@ai-sdk/openai";
|
|
174
172
|
import { createHallucinationScorer } from "@mastra/evals/scorers/llm";
|
|
175
173
|
|
|
176
|
-
const scorer = createHallucinationScorer({ model: openai
|
|
174
|
+
const scorer = createHallucinationScorer({ model: 'openai/gpt-4o-mini', options: {
|
|
177
175
|
context: [
|
|
178
176
|
"The first Star Wars movie was released in 1977.",
|
|
179
177
|
"It was directed by George Lucas.",
|
|
@@ -209,10 +207,9 @@ The Scorer assigns a mid-range score because parts of the response conflict with
|
|
|
209
207
|
In this example, the response contradicts every key fact in the context. None of the claims can be verified, and all presented details are factually incorrect.
|
|
210
208
|
|
|
211
209
|
```typescript filename="src/example-complete-hallucination.ts" showLineNumbers copy
|
|
212
|
-
import { openai } from "@ai-sdk/openai";
|
|
213
210
|
import { createHallucinationScorer } from "@mastra/evals/scorers/llm";
|
|
214
211
|
|
|
215
|
-
const scorer = createHallucinationScorer({ model: openai
|
|
212
|
+
const scorer = createHallucinationScorer({ model: 'openai/gpt-4o-mini', options: {
|
|
216
213
|
context: [
|
|
217
214
|
"The Wright brothers made their first flight in 1903.",
|
|
218
215
|
"The flight lasted 12 seconds.",
|