@mastra/mcp-docs-server 0.13.31 → 0.13.32-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/.docs/organized/changelogs/%40internal%2Fexternal-types.md +1 -0
  2. package/.docs/organized/changelogs/%40mastra%2Fagent-builder.md +11 -11
  3. package/.docs/organized/changelogs/%40mastra%2Fai-sdk.md +25 -25
  4. package/.docs/organized/changelogs/%40mastra%2Fastra.md +11 -11
  5. package/.docs/organized/changelogs/%40mastra%2Fchroma.md +10 -10
  6. package/.docs/organized/changelogs/%40mastra%2Fclickhouse.md +10 -10
  7. package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +15 -15
  8. package/.docs/organized/changelogs/%40mastra%2Fcloud.md +11 -11
  9. package/.docs/organized/changelogs/%40mastra%2Fcloudflare-d1.md +10 -10
  10. package/.docs/organized/changelogs/%40mastra%2Fcloudflare.md +23 -23
  11. package/.docs/organized/changelogs/%40mastra%2Fcore.md +122 -122
  12. package/.docs/organized/changelogs/%40mastra%2Fcouchbase.md +10 -10
  13. package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloud.md +20 -20
  14. package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +19 -19
  15. package/.docs/organized/changelogs/%40mastra%2Fdeployer-netlify.md +19 -19
  16. package/.docs/organized/changelogs/%40mastra%2Fdeployer-vercel.md +19 -19
  17. package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +31 -31
  18. package/.docs/organized/changelogs/%40mastra%2Fdynamodb.md +10 -10
  19. package/.docs/organized/changelogs/%40mastra%2Fevals.md +19 -19
  20. package/.docs/organized/changelogs/%40mastra%2Flance.md +10 -10
  21. package/.docs/organized/changelogs/%40mastra%2Flibsql.md +23 -23
  22. package/.docs/organized/changelogs/%40mastra%2Floggers.md +10 -10
  23. package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +16 -16
  24. package/.docs/organized/changelogs/%40mastra%2Fmcp-registry-registry.md +10 -10
  25. package/.docs/organized/changelogs/%40mastra%2Fmcp.md +14 -14
  26. package/.docs/organized/changelogs/%40mastra%2Fmemory.md +10 -10
  27. package/.docs/organized/changelogs/%40mastra%2Fmongodb.md +10 -10
  28. package/.docs/organized/changelogs/%40mastra%2Fmssql.md +11 -11
  29. package/.docs/organized/changelogs/%40mastra%2Fopensearch.md +10 -10
  30. package/.docs/organized/changelogs/%40mastra%2Fpg.md +21 -21
  31. package/.docs/organized/changelogs/%40mastra%2Fpinecone.md +11 -11
  32. package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +35 -35
  33. package/.docs/organized/changelogs/%40mastra%2Fqdrant.md +11 -11
  34. package/.docs/organized/changelogs/%40mastra%2Frag.md +10 -10
  35. package/.docs/organized/changelogs/%40mastra%2Freact.md +20 -0
  36. package/.docs/organized/changelogs/%40mastra%2Fs3vectors.md +9 -0
  37. package/.docs/organized/changelogs/%40mastra%2Fserver.md +37 -37
  38. package/.docs/organized/changelogs/%40mastra%2Fturbopuffer.md +11 -11
  39. package/.docs/organized/changelogs/%40mastra%2Fupstash.md +10 -10
  40. package/.docs/organized/changelogs/%40mastra%2Fvectorize.md +10 -10
  41. package/.docs/organized/changelogs/%40mastra%2Fvoice-azure.md +13 -13
  42. package/.docs/organized/changelogs/%40mastra%2Fvoice-cloudflare.md +10 -10
  43. package/.docs/organized/changelogs/%40mastra%2Fvoice-deepgram.md +10 -10
  44. package/.docs/organized/changelogs/%40mastra%2Fvoice-elevenlabs.md +10 -10
  45. package/.docs/organized/changelogs/%40mastra%2Fvoice-gladia.md +10 -10
  46. package/.docs/organized/changelogs/%40mastra%2Fvoice-google-gemini-live.md +9 -0
  47. package/.docs/organized/changelogs/%40mastra%2Fvoice-google.md +19 -19
  48. package/.docs/organized/changelogs/%40mastra%2Fvoice-murf.md +10 -10
  49. package/.docs/organized/changelogs/%40mastra%2Fvoice-openai-realtime.md +11 -11
  50. package/.docs/organized/changelogs/%40mastra%2Fvoice-openai.md +10 -10
  51. package/.docs/organized/changelogs/%40mastra%2Fvoice-playai.md +10 -10
  52. package/.docs/organized/changelogs/%40mastra%2Fvoice-sarvam.md +10 -10
  53. package/.docs/organized/changelogs/%40mastra%2Fvoice-speechify.md +10 -10
  54. package/.docs/organized/changelogs/create-mastra.md +11 -11
  55. package/.docs/organized/changelogs/mastra.md +26 -26
  56. package/.docs/organized/code-examples/agent.md +55 -1
  57. package/.docs/organized/code-examples/agui.md +2 -2
  58. package/.docs/organized/code-examples/ai-elements.md +2 -2
  59. package/.docs/organized/code-examples/ai-sdk-useChat.md +2 -2
  60. package/.docs/organized/code-examples/ai-sdk-v5.md +2 -2
  61. package/.docs/organized/code-examples/assistant-ui.md +2 -2
  62. package/.docs/organized/code-examples/bird-checker-with-nextjs-and-eval.md +2 -2
  63. package/.docs/organized/code-examples/bird-checker-with-nextjs.md +2 -2
  64. package/.docs/organized/code-examples/client-side-tools.md +2 -2
  65. package/.docs/organized/code-examples/crypto-chatbot.md +2 -2
  66. package/.docs/organized/code-examples/heads-up-game.md +2 -2
  67. package/.docs/organized/code-examples/openapi-spec-writer.md +2 -2
  68. package/.docs/raw/agents/agent-memory.mdx +48 -31
  69. package/.docs/raw/agents/guardrails.mdx +8 -1
  70. package/.docs/raw/agents/networks.mdx +197 -128
  71. package/.docs/raw/agents/overview.mdx +10 -9
  72. package/.docs/raw/frameworks/agentic-uis/ai-sdk.mdx +92 -1
  73. package/.docs/raw/getting-started/installation.mdx +61 -68
  74. package/.docs/raw/memory/conversation-history.mdx +2 -2
  75. package/.docs/raw/memory/semantic-recall.mdx +36 -10
  76. package/.docs/raw/observability/ai-tracing/overview.mdx +220 -0
  77. package/.docs/raw/rag/chunking-and-embedding.mdx +19 -7
  78. package/.docs/raw/reference/cli/create-mastra.mdx +1 -1
  79. package/.docs/raw/reference/cli/mastra.mdx +1 -1
  80. package/.docs/raw/reference/client-js/agents.mdx +44 -25
  81. package/.docs/raw/reference/scorers/answer-relevancy.mdx +3 -6
  82. package/.docs/raw/reference/scorers/answer-similarity.mdx +7 -13
  83. package/.docs/raw/reference/scorers/bias.mdx +3 -6
  84. package/.docs/raw/reference/scorers/completeness.mdx +3 -6
  85. package/.docs/raw/reference/scorers/context-precision.mdx +6 -9
  86. package/.docs/raw/reference/scorers/context-relevance.mdx +12 -18
  87. package/.docs/raw/reference/scorers/faithfulness.mdx +3 -6
  88. package/.docs/raw/reference/scorers/hallucination.mdx +3 -6
  89. package/.docs/raw/reference/scorers/noise-sensitivity.mdx +13 -23
  90. package/.docs/raw/reference/scorers/prompt-alignment.mdx +16 -20
  91. package/.docs/raw/reference/scorers/tool-call-accuracy.mdx +4 -5
  92. package/.docs/raw/reference/scorers/toxicity.mdx +3 -6
  93. package/.docs/raw/reference/workflows/step.mdx +1 -1
  94. package/.docs/raw/reference/workflows/workflow-methods/sendEvent.mdx +23 -2
  95. package/.docs/raw/reference/workflows/workflow-methods/sleep.mdx +22 -4
  96. package/.docs/raw/reference/workflows/workflow-methods/sleepUntil.mdx +14 -4
  97. package/.docs/raw/reference/workflows/workflow-methods/waitForEvent.mdx +18 -1
  98. package/.docs/raw/server-db/runtime-context.mdx +13 -3
  99. package/.docs/raw/streaming/tool-streaming.mdx +30 -0
  100. package/.docs/raw/tools-mcp/overview.mdx +1 -1
  101. package/.docs/raw/workflows/overview.mdx +1 -1
  102. package/.docs/raw/workflows/suspend-and-resume.mdx +34 -23
  103. package/CHANGELOG.md +15 -0
  104. package/package.json +5 -5
  105. package/.docs/raw/workflows/pausing-execution.mdx +0 -142
@@ -73,28 +73,40 @@ We go deeper into chunking strategies in our [chunk documentation](/reference/ra
73
73
 
74
74
  ## Step 2: Embedding Generation
75
75
 
76
- Transform chunks into embeddings using your preferred provider. Mastra supports many embedding providers, including OpenAI and Cohere:
76
+ Transform chunks into embeddings using your preferred provider. Mastra supports embedding models through the model router or AI SDK packages.
77
77
 
78
- ### Using OpenAI
78
+ ### Using the Model Router (Recommended)
79
+
80
+ The simplest way is to use Mastra's model router with `provider/model` strings:
79
81
 
80
82
  ```ts showLineNumbers copy
81
- import { openai } from "@ai-sdk/openai";
83
+ import { ModelRouterEmbeddingModel } from "@mastra/core";
82
84
  import { embedMany } from "ai";
83
85
 
86
+ const embeddingModel = new ModelRouterEmbeddingModel("openai/text-embedding-3-small");
87
+
84
88
  const { embeddings } = await embedMany({
85
- model: openai.embedding("text-embedding-3-small"),
89
+ model: embeddingModel,
86
90
  values: chunks.map((chunk) => chunk.text),
87
91
  });
88
92
  ```
89
93
 
90
- ### Using Cohere
94
+ Supported embedding models:
95
+ - **OpenAI**: `text-embedding-3-small`, `text-embedding-3-large`, `text-embedding-ada-002`
96
+ - **Google**: `gemini-embedding-001`, `text-embedding-004`
97
+
98
+ The model router automatically handles API key detection from environment variables.
99
+
100
+ ### Using AI SDK Packages
101
+
102
+ You can also use AI SDK embedding models directly:
91
103
 
92
104
  ```ts showLineNumbers copy
93
- import { cohere } from "@ai-sdk/cohere";
105
+ import { openai } from "@ai-sdk/openai";
94
106
  import { embedMany } from "ai";
95
107
 
96
108
  const { embeddings } = await embedMany({
97
- model: cohere.embedding("embed-english-v3.0"),
109
+ model: openai.embedding("text-embedding-3-small"),
98
110
  values: chunks.map((chunk) => chunk.text),
99
111
  });
100
112
  ```
@@ -96,7 +96,7 @@ Instead of an interactive prompt you can also define these CLI flags.
96
96
  name: "--components",
97
97
  type: "string",
98
98
  description:
99
- "Comma-separated list of components (agents, tools, workflows)",
99
+ "Comma-separated list of components (agents, tools, workflows, scorers)",
100
100
  isOptional: true,
101
101
  },
102
102
  {
@@ -173,7 +173,7 @@ The directory where Mastra files should be saved to. Defaults to `src`.
173
173
 
174
174
  #### `--components`
175
175
 
176
- Comma-separated list of components to add. For each component a new folder will be created. Defaults to `['agents', 'tools', 'workflows']`.
176
+ Comma-separated list of components to add. For each component a new folder will be created. Choose from: `"agents" | "tools" | "workflows" | "scorers"`. Defaults to `['agents', 'tools', 'workflows']`.
177
177
 
178
178
  #### `--llm`
179
179
 
@@ -67,27 +67,11 @@ const response = await agent.stream({
67
67
 
68
68
  // Process data stream with the processDataStream util
69
69
  response.processDataStream({
70
- onTextPart: (text) => {
71
- process.stdout.write(text);
72
- },
73
- onFilePart: (file) => {
74
- console.log(file);
75
- },
76
- onDataPart: (data) => {
77
- console.log(data);
78
- },
79
- onErrorPart: (error) => {
80
- console.error(error);
70
+ onChunk: async(chunk) => {
71
+ console.log(chunk);
81
72
  },
82
73
  });
83
74
 
84
- // Process text stream with the processTextStream util
85
- // (used with structured output)
86
- response.processTextStream({
87
- onTextPart: text => {
88
- process.stdout.write(text);
89
- },
90
- });
91
75
 
92
76
  // You can also read from response body directly
93
77
  const reader = response.body.getReader();
@@ -134,8 +118,13 @@ const response = await agent.stream({
134
118
  });
135
119
 
136
120
  response.processDataStream({
137
- onTextPart: (text) => console.log(text),
138
- onToolCallPart: (toolCall) => console.log('Tool called:', toolCall.toolName),
121
+ onChunk: async (chunk) => {
122
+ if (chunk.type === 'text-delta') {
123
+ console.log(chunk.payload.text);
124
+ } else if (chunk.type === 'tool-call') {
125
+ console.log(`calling tool ${chunk.payload.toolName} with args ${JSON.stringify(chunk.payload.args, null, 2)}`);
126
+ }
127
+ },
139
128
  });
140
129
  ```
141
130
 
@@ -176,15 +165,45 @@ const response = await agent.stream(
176
165
 
177
166
  // Process the stream
178
167
  response.processDataStream({
179
- onChunk: (chunk) => {
180
- console.log(chunk);
168
+ onChunk: async (chunk) => {
169
+ if (chunk.type === 'text-delta') {
170
+ console.log(chunk.payload.text);
171
+ }
181
172
  },
182
173
  });
183
174
  ```
184
175
 
185
- Currently, AI SDK V5 format is not supported in the client SDK.
186
- For AI SDK v5 compatible format, leverage the `@mastra/ai-sdk` package
187
- [AI SDK v5 Stream Compatibility](/docs/frameworks/agentic-uis/ai-sdk#enabling-stream-compatibility)
176
+ #### AI SDK compatible format
177
+
178
+ To stream AI SDK-formatted parts on the client from an `agent.stream(...)` response, wrap `response.processDataStream` into a `ReadableStream<ChunkType>` and use `toAISdkFormat`:
179
+
180
+ ```typescript filename="client-ai-sdk-transform.ts" copy
181
+ import { createUIMessageStream } from 'ai';
182
+ import { toAISdkFormat } from '@mastra/ai-sdk';
183
+ import type { ChunkType, MastraModelOutput } from '@mastra/core/stream';
184
+
185
+ const response = await agent.stream({ messages: 'Tell me a story' });
186
+
187
+ const chunkStream: ReadableStream<ChunkType> = new ReadableStream<ChunkType>({
188
+ start(controller) {
189
+ response.processDataStream({
190
+ onChunk: async (chunk) => controller.enqueue(chunk as ChunkType),
191
+ }).finally(() => controller.close());
192
+ },
193
+ });
194
+
195
+ const uiMessageStream = createUIMessageStream({
196
+ execute: async ({ writer }) => {
197
+ for await (const part of toAISdkFormat(chunkStream as unknown as MastraModelOutput, { from: 'agent' })) {
198
+ writer.write(part);
199
+ }
200
+ },
201
+ });
202
+
203
+ for await (const part of uiMessageStream) {
204
+ console.log(part);
205
+ }
206
+ ```
188
207
 
189
208
  ### Generate
190
209
 
@@ -116,10 +116,9 @@ A relevancy score between 0 and 1:
116
116
  In this example, the response accurately addresses the input query with specific and relevant information.
117
117
 
118
118
  ```typescript filename="src/example-high-answer-relevancy.ts" showLineNumbers copy
119
- import { openai } from "@ai-sdk/openai";
120
119
  import { createAnswerRelevancyScorer } from "@mastra/evals/scorers/llm";
121
120
 
122
- const scorer = createAnswerRelevancyScorer({ model: openai("gpt-4o-mini") });
121
+ const scorer = createAnswerRelevancyScorer({ model: 'openai/gpt-4o-mini' });
123
122
 
124
123
  const inputMessages = [{ role: 'user', content: "What are the health benefits of regular exercise?" }];
125
124
  const outputMessage = { text: "Regular exercise improves cardiovascular health, strengthens muscles, boosts metabolism, and enhances mental well-being through the release of endorphins." };
@@ -148,10 +147,9 @@ The output receives a high score because it accurately answers the query without
148
147
  In this example, the response addresses the query in part but includes additional information that isn’t directly relevant.
149
148
 
150
149
  ```typescript filename="src/example-partial-answer-relevancy.ts" showLineNumbers copy
151
- import { openai } from "@ai-sdk/openai";
152
150
  import { createAnswerRelevancyScorer } from "@mastra/evals/scorers/llm";
153
151
 
154
- const scorer = createAnswerRelevancyScorer({ model: openai("gpt-4o-mini") });
152
+ const scorer = createAnswerRelevancyScorer({ model: 'openai/gpt-4o-mini' });
155
153
 
156
154
  const inputMessages = [{ role: 'user', content: "What should a healthy breakfast include?" }];
157
155
  const outputMessage = { text: "A nutritious breakfast should include whole grains and protein. However, the timing of your breakfast is just as important - studies show eating within 2 hours of waking optimizes metabolism and energy levels throughout the day." };
@@ -180,10 +178,9 @@ The output receives a lower score because it partially answers the query. While
180
178
  In this example, the response does not address the query and contains information that is entirely unrelated.
181
179
 
182
180
  ```typescript filename="src/example-low-answer-relevancy.ts" showLineNumbers copy
183
- import { openai } from "@ai-sdk/openai";
184
181
  import { createAnswerRelevancyScorer } from "@mastra/evals/scorers/llm";
185
182
 
186
- const scorer = createAnswerRelevancyScorer({ model: openai("gpt-4o-mini") });
183
+ const scorer = createAnswerRelevancyScorer({ model: 'openai/gpt-4o-mini' });
187
184
 
188
185
  const inputMessages = [{ role: 'user', content: "What are the benefits of meditation?" }];
189
186
  const outputMessage = { text: "The Great Wall of China is over 13,000 miles long and was built during the Ming Dynasty to protect against invasions." };
@@ -175,12 +175,11 @@ await runExperiment({
175
175
  In this example, the agent's output semantically matches the ground truth perfectly.
176
176
 
177
177
  ```typescript filename="src/example-perfect-similarity.ts" showLineNumbers copy
178
- import { openai } from "@ai-sdk/openai";
179
178
  import { runExperiment } from "@mastra/core/scores";
180
179
  import { createAnswerSimilarityScorer } from "@mastra/evals/scorers/llm";
181
180
  import { myAgent } from "./agent";
182
181
 
183
- const scorer = createAnswerSimilarityScorer({ model: openai("gpt-4o-mini") });
182
+ const scorer = createAnswerSimilarityScorer({ model: 'openai/gpt-4o-mini' });
184
183
 
185
184
  const result = await runExperiment({
186
185
  data: [
@@ -214,12 +213,11 @@ The output receives a perfect score because both the agent's answer and ground t
214
213
  In this example, the agent provides the same information as the ground truth but with different phrasing.
215
214
 
216
215
  ```typescript filename="src/example-semantic-similarity.ts" showLineNumbers copy
217
- import { openai } from "@ai-sdk/openai";
218
216
  import { runExperiment } from "@mastra/core/scores";
219
217
  import { createAnswerSimilarityScorer } from "@mastra/evals/scorers/llm";
220
218
  import { myAgent } from "./agent";
221
219
 
222
- const scorer = createAnswerSimilarityScorer({ model: openai("gpt-4o-mini") });
220
+ const scorer = createAnswerSimilarityScorer({ model: 'openai/gpt-4o-mini' });
223
221
 
224
222
  const result = await runExperiment({
225
223
  data: [
@@ -253,12 +251,11 @@ The output receives a high score because it conveys the same information with eq
253
251
  In this example, the agent's response is partially correct but missing key information.
254
252
 
255
253
  ```typescript filename="src/example-partial-similarity.ts" showLineNumbers copy
256
- import { openai } from "@ai-sdk/openai";
257
254
  import { runExperiment } from "@mastra/core/scores";
258
255
  import { createAnswerSimilarityScorer } from "@mastra/evals/scorers/llm";
259
256
  import { myAgent } from "./agent";
260
257
 
261
- const scorer = createAnswerSimilarityScorer({ model: openai("gpt-4o-mini") });
258
+ const scorer = createAnswerSimilarityScorer({ model: 'openai/gpt-4o-mini' });
262
259
 
263
260
  const result = await runExperiment({
264
261
  data: [
@@ -292,12 +289,11 @@ The output receives a moderate score because it includes some correct informatio
292
289
  In this example, the agent provides factually incorrect information that contradicts the ground truth.
293
290
 
294
291
  ```typescript filename="src/example-contradiction.ts" showLineNumbers copy
295
- import { openai } from "@ai-sdk/openai";
296
292
  import { runExperiment } from "@mastra/core/scores";
297
293
  import { createAnswerSimilarityScorer } from "@mastra/evals/scorers/llm";
298
294
  import { myAgent } from "./agent";
299
295
 
300
- const scorer = createAnswerSimilarityScorer({ model: openai("gpt-4o-mini") });
296
+ const scorer = createAnswerSimilarityScorer({ model: 'openai/gpt-4o-mini' });
301
297
 
302
298
  const result = await runExperiment({
303
299
  data: [
@@ -332,13 +328,12 @@ Use the scorer in your test suites to ensure agent consistency over time:
332
328
 
333
329
  ```typescript filename="src/ci-integration.test.ts" showLineNumbers copy
334
330
  import { describe, it, expect } from 'vitest';
335
- import { openai } from "@ai-sdk/openai";
336
331
  import { runExperiment } from "@mastra/core/scores";
337
332
  import { createAnswerSimilarityScorer } from "@mastra/evals/scorers/llm";
338
333
  import { myAgent } from "./agent";
339
334
 
340
335
  describe('Agent Consistency Tests', () => {
341
- const scorer = createAnswerSimilarityScorer({ model: openai("gpt-4o-mini") });
336
+ const scorer = createAnswerSimilarityScorer({ model: 'openai/gpt-4o-mini' });
342
337
 
343
338
  it('should provide accurate factual answers', async () => {
344
339
  const result = await runExperiment({
@@ -386,14 +381,13 @@ describe('Agent Consistency Tests', () => {
386
381
  Customize the scorer behavior for specific use cases:
387
382
 
388
383
  ```typescript filename="src/custom-config.ts" showLineNumbers copy
389
- import { openai } from "@ai-sdk/openai";
390
384
  import { runExperiment } from "@mastra/core/scores";
391
385
  import { createAnswerSimilarityScorer } from "@mastra/evals/scorers/llm";
392
386
  import { myAgent } from "./agent";
393
387
 
394
388
  // Configure for strict exact matching with high scale
395
389
  const strictScorer = createAnswerSimilarityScorer({
396
- model: openai("gpt-4o-mini"),
390
+ model: 'openai/gpt-4o-mini',
397
391
  options: {
398
392
  exactMatchBonus: 0.5, // Higher bonus for exact matches
399
393
  contradictionPenalty: 2.0, // Very strict on contradictions
@@ -404,7 +398,7 @@ const strictScorer = createAnswerSimilarityScorer({
404
398
 
405
399
  // Configure for lenient semantic matching
406
400
  const lenientScorer = createAnswerSimilarityScorer({
407
- model: openai("gpt-4o-mini"),
401
+ model: 'openai/gpt-4o-mini',
408
402
  options: {
409
403
  semanticThreshold: 0.6, // Lower threshold for semantic matches
410
404
  contradictionPenalty: 0.5, // More forgiving on minor contradictions
@@ -127,10 +127,9 @@ A bias score between 0 and 1:
127
127
  In this example, the response contains overtly biased language and generalizations. The assumptions are explicit and strongly reinforce stereotypes or discriminatory views.
128
128
 
129
129
  ```typescript filename="src/example-high-bias.ts" showLineNumbers copy
130
- import { openai } from "@ai-sdk/openai";
131
130
  import { createBiasScorer } from "@mastra/evals/scorers/llm";
132
131
 
133
- const scorer = createBiasScorer({ model: openai("gpt-4o-mini") });
132
+ const scorer = createBiasScorer({ model: 'openai/gpt-4o-mini' });
134
133
 
135
134
  const query = "What makes someone a good leader?";
136
135
  const response =
@@ -160,10 +159,9 @@ The output receives a high score because it's a clear example of discriminatory
160
159
  In this example, the response contains a mix of reasonable points with subtle bias. While parts of the response are fair, underlying bias is still present.
161
160
 
162
161
  ```typescript filename="src/example-mixed-bias.ts" showLineNumbers copy
163
- import { openai } from "@ai-sdk/openai";
164
162
  import { createBiasScorer } from "@mastra/evals/scorers/llm";
165
163
 
166
- const scorer = createBiasScorer({ model: openai("gpt-4o-mini") });
164
+ const scorer = createBiasScorer({ model: 'openai/gpt-4o-mini' });
167
165
 
168
166
  const query = "How do different age groups perform at work?";
169
167
  const response =
@@ -193,10 +191,9 @@ The output receives a lower score because the response introduces bias in a more
193
191
  In this example, the response focuses on objective and neutral criteria without introducing biased assumptions.
194
192
 
195
193
  ```typescript filename="src/example-low-bias.ts" showLineNumbers copy
196
- import { openai } from "@ai-sdk/openai";
197
194
  import { createBiasScorer } from "@mastra/evals/scorers/llm";
198
195
 
199
- const scorer = createBiasScorer({ model: openai("gpt-4o-mini") });
196
+ const scorer = createBiasScorer({ model: 'openai/gpt-4o-mini' });
200
197
 
201
198
  const query = "What is the best hiring practice?";
202
199
  const response =
@@ -110,10 +110,9 @@ A completeness score between 0 and 1:
110
110
  In this example, the response comprehensively addresses all aspects of the query with detailed information covering multiple dimensions.
111
111
 
112
112
  ```typescript filename="src/example-high-completeness.ts" showLineNumbers copy
113
- import { openai } from "@ai-sdk/openai";
114
113
  import { createCompletenessScorer } from "@mastra/evals/scorers/llm";
115
114
 
116
- const scorer = createCompletenessScorer({ model: openai("gpt-4o-mini") });
115
+ const scorer = createCompletenessScorer({ model: 'openai/gpt-4o-mini' });
117
116
 
118
117
  const query = "Explain the process of photosynthesis, including the inputs, outputs, and stages involved.";
119
118
  const response =
@@ -143,10 +142,9 @@ The output receives a high score because it addresses all requested aspects: inp
143
142
  In this example, the response addresses some key points but misses important aspects or lacks sufficient detail.
144
143
 
145
144
  ```typescript filename="src/example-partial-completeness.ts" showLineNumbers copy
146
- import { openai } from "@ai-sdk/openai";
147
145
  import { createCompletenessScorer } from "@mastra/evals/scorers/llm";
148
146
 
149
- const scorer = createCompletenessScorer({ model: openai("gpt-4o-mini") });
147
+ const scorer = createCompletenessScorer({ model: 'openai/gpt-4o-mini' });
150
148
 
151
149
  const query = "What are the benefits and drawbacks of remote work for both employees and employers?";
152
150
  const response =
@@ -176,10 +174,9 @@ The output receives a moderate score because it covers employee benefits and som
176
174
  In this example, the response only partially addresses the query and misses several important aspects.
177
175
 
178
176
  ```typescript filename="src/example-low-completeness.ts" showLineNumbers copy
179
- import { openai } from "@ai-sdk/openai";
180
177
  import { createCompletenessScorer } from "@mastra/evals/scorers/llm";
181
178
 
182
- const scorer = createCompletenessScorer({ model: openai("gpt-4o-mini") });
179
+ const scorer = createCompletenessScorer({ model: 'openai/gpt-4o-mini' });
183
180
 
184
181
  const query = "Compare renewable and non-renewable energy sources in terms of cost, environmental impact, and sustainability.";
185
182
  const response =
@@ -31,7 +31,7 @@ Use when optimizing context selection for:
31
31
  content={[
32
32
  {
33
33
  name: "model",
34
- type: "MastraLanguageModel",
34
+ type: "MastraModelConfig",
35
35
  description: "The language model to use for evaluating context relevance",
36
36
  required: true,
37
37
  },
@@ -146,7 +146,7 @@ MAP = (1.0 + 0.67) / 2 = 0.835 ≈ **0.83**
146
146
 
147
147
  ```typescript
148
148
  const scorer = createContextPrecisionScorer({
149
- model: openai('gpt-4o-mini'),
149
+ model: 'openai/gpt-4o-mini',
150
150
  options: {
151
151
  contextExtractor: (input, output) => {
152
152
  // Extract context dynamically based on the query
@@ -165,7 +165,7 @@ const scorer = createContextPrecisionScorer({
165
165
 
166
166
  ```typescript
167
167
  const scorer = createContextPrecisionScorer({
168
- model: openai('gpt-4o-mini'),
168
+ model: 'openai/gpt-4o-mini',
169
169
  options: {
170
170
  context: [
171
171
  // Simulate retrieved documents from vector database
@@ -187,11 +187,10 @@ const scorer = createContextPrecisionScorer({
187
187
  This example shows perfect context precision where all relevant context appears early:
188
188
 
189
189
  ```typescript
190
- import { openai } from '@ai-sdk/openai';
191
190
  import { createContextPrecisionScorer } from '@mastra/evals';
192
191
 
193
192
  const scorer = createContextPrecisionScorer({
194
- model: openai('gpt-4o-mini'),
193
+ model: 'openai/gpt-4o-mini',
195
194
  options: {
196
195
  context: [
197
196
  'Photosynthesis is the process by which plants convert sunlight, carbon dioxide, and water into glucose and oxygen.',
@@ -234,11 +233,10 @@ console.log(result);
234
233
  This example shows moderate precision with both relevant and irrelevant context:
235
234
 
236
235
  ```typescript
237
- import { openai } from '@ai-sdk/openai';
238
236
  import { createContextPrecisionScorer } from '@mastra/evals';
239
237
 
240
238
  const scorer = createContextPrecisionScorer({
241
- model: openai('gpt-4o-mini'),
239
+ model: 'openai/gpt-4o-mini',
242
240
  options: {
243
241
  context: [
244
242
  'Regular exercise improves cardiovascular health by strengthening the heart muscle.',
@@ -283,11 +281,10 @@ console.log(result);
283
281
  This example shows poor context precision with mostly irrelevant context:
284
282
 
285
283
  ```typescript
286
- import { openai } from '@ai-sdk/openai';
287
284
  import { createContextPrecisionScorer } from '@mastra/evals';
288
285
 
289
286
  const scorer = createContextPrecisionScorer({
290
- model: openai('gpt-4o-mini'),
287
+ model: 'openai/gpt-4o-mini',
291
288
  options: {
292
289
  context: [
293
290
  'The weather forecast shows sunny skies this weekend.',
@@ -31,7 +31,7 @@ Use when optimizing for:
31
31
  content={[
32
32
  {
33
33
  name: "model",
34
- type: "MastraLanguageModel",
34
+ type: "MastraModelConfig",
35
35
  description: "The language model to use for evaluating context relevance",
36
36
  required: true,
37
37
  },
@@ -185,12 +185,11 @@ Use results to improve your system:
185
185
  Control how penalties are applied for unused and missing context:
186
186
 
187
187
  ```typescript
188
- import { openai } from '@ai-sdk/openai';
189
188
  import { createContextRelevanceScorerLLM } from '@mastra/evals';
190
189
 
191
190
  // Stricter penalty configuration
192
191
  const strictScorer = createContextRelevanceScorerLLM({
193
- model: openai('gpt-4o-mini'),
192
+ model: 'openai/gpt-4o-mini',
194
193
  options: {
195
194
  context: [
196
195
  'Einstein won the Nobel Prize for photoelectric effect',
@@ -208,7 +207,7 @@ const strictScorer = createContextRelevanceScorerLLM({
208
207
 
209
208
  // Lenient penalty configuration
210
209
  const lenientScorer = createContextRelevanceScorerLLM({
211
- model: openai('gpt-4o-mini'),
210
+ model: 'openai/gpt-4o-mini',
212
211
  options: {
213
212
  context: [
214
213
  'Einstein won the Nobel Prize for photoelectric effect',
@@ -254,7 +253,7 @@ console.log('Lenient penalties:', lenientResult.score); // Higher score, less pe
254
253
 
255
254
  ```typescript
256
255
  const scorer = createContextRelevanceScorerLLM({
257
- model: openai('gpt-4o'),
256
+ model: 'openai/gpt-4o',
258
257
  options: {
259
258
  contextExtractor: (input, output) => {
260
259
  // Extract context based on the query
@@ -278,7 +277,7 @@ const scorer = createContextRelevanceScorerLLM({
278
277
 
279
278
  ```typescript
280
279
  const scorer = createContextRelevanceScorerLLM({
281
- model: openai('gpt-4o-mini'),
280
+ model: 'openai/gpt-4o-mini',
282
281
  options: {
283
282
  context: [
284
283
  'Relevant information...',
@@ -295,7 +294,7 @@ const scorer = createContextRelevanceScorerLLM({
295
294
 
296
295
  ```typescript
297
296
  const scorer = createContextRelevanceScorerLLM({
298
- model: openai('gpt-4o-mini'),
297
+ model: 'openai/gpt-4o-mini',
299
298
  options: {
300
299
  contextExtractor: (input, output) => {
301
300
  const query = input?.inputMessages?.[0]?.content || '';
@@ -323,11 +322,10 @@ const scorer = createContextRelevanceScorerLLM({
323
322
  This example shows excellent context relevance where all context directly supports the response:
324
323
 
325
324
  ```typescript
326
- import { openai } from '@ai-sdk/openai';
327
325
  import { createContextRelevanceScorerLLM } from '@mastra/evals';
328
326
 
329
327
  const scorer = createContextRelevanceScorerLLM({
330
- model: openai('gpt-4o-mini'),
328
+ model: 'openai/gpt-4o-mini',
331
329
  options: {
332
330
  context: [
333
331
  'Einstein won the Nobel Prize for his discovery of the photoelectric effect in 1921.',
@@ -370,11 +368,10 @@ console.log(result);
370
368
  This example shows moderate relevance with some context being irrelevant or unused:
371
369
 
372
370
  ```typescript
373
- import { openai } from '@ai-sdk/openai';
374
371
  import { createContextRelevanceScorerLLM } from '@mastra/evals';
375
372
 
376
373
  const scorer = createContextRelevanceScorerLLM({
377
- model: openai('gpt-4o-mini'),
374
+ model: 'openai/gpt-4o-mini',
378
375
  options: {
379
376
  context: [
380
377
  'Solar eclipses occur when the Moon blocks the Sun.',
@@ -415,7 +412,7 @@ console.log(result);
415
412
 
416
413
  // With custom penalty configuration
417
414
  const customScorer = createContextRelevanceScorerLLM({
418
- model: openai('gpt-4o-mini'),
415
+ model: 'openai/gpt-4o-mini',
419
416
  options: {
420
417
  context: [
421
418
  'Solar eclipses occur when the Moon blocks the Sun.',
@@ -450,11 +447,10 @@ console.log(customResult);
450
447
  This example shows poor context relevance with mostly irrelevant information:
451
448
 
452
449
  ```typescript
453
- import { openai } from '@ai-sdk/openai';
454
450
  import { createContextRelevanceScorerLLM } from '@mastra/evals';
455
451
 
456
452
  const scorer = createContextRelevanceScorerLLM({
457
- model: openai('gpt-4o-mini'),
453
+ model: 'openai/gpt-4o-mini',
458
454
  options: {
459
455
  context: [
460
456
  'The Great Barrier Reef is located in Australia.',
@@ -499,11 +495,10 @@ console.log(result);
499
495
  Extract context dynamically based on the run input:
500
496
 
501
497
  ```typescript
502
- import { openai } from '@ai-sdk/openai';
503
498
  import { createContextRelevanceScorerLLM } from '@mastra/evals';
504
499
 
505
500
  const scorer = createContextRelevanceScorerLLM({
506
- model: openai('gpt-4o-mini'),
501
+ model: 'openai/gpt-4o-mini',
507
502
  options: {
508
503
  contextExtractor: (input, output) => {
509
504
  // Extract query from input
@@ -543,11 +538,10 @@ const scorer = createContextRelevanceScorerLLM({
543
538
  Integrate with RAG pipelines to evaluate retrieved context:
544
539
 
545
540
  ```typescript
546
- import { openai } from '@ai-sdk/openai';
547
541
  import { createContextRelevanceScorerLLM } from '@mastra/evals';
548
542
 
549
543
  const scorer = createContextRelevanceScorerLLM({
550
- model: openai('gpt-4o-mini'),
544
+ model: 'openai/gpt-4o-mini',
551
545
  options: {
552
546
  contextExtractor: (input, output) => {
553
547
  // Extract from RAG retrieval results
@@ -121,10 +121,9 @@ A faithfulness score between 0 and 1:
121
121
  In this example, the response closely aligns with the context. Each statement in the output is verifiable and supported by the provided context entries, resulting in a high score.
122
122
 
123
123
  ```typescript filename="src/example-high-faithfulness.ts" showLineNumbers copy
124
- import { openai } from "@ai-sdk/openai";
125
124
  import { createFaithfulnessScorer } from "@mastra/evals/scorers/llm";
126
125
 
127
- const scorer = createFaithfulnessScorer({ model: openai("gpt-4o-mini"), options: {
126
+ const scorer = createFaithfulnessScorer({ model: 'openai/gpt-4o-mini', options: {
128
127
  context: [
129
128
  "The Tesla Model 3 was launched in 2017.",
130
129
  "It has a range of up to 358 miles.",
@@ -159,10 +158,9 @@ The output receives a score of 1 because all the information it provides can be
159
158
  In this example, there are a mix of supported and unsupported claims. Some parts of the response are backed by the context, while others introduce new information not found in the source material.
160
159
 
161
160
  ```typescript filename="src/example-mixed-faithfulness.ts" showLineNumbers copy
162
- import { openai } from "@ai-sdk/openai";
163
161
  import { createFaithfulnessScorer } from "@mastra/evals/scorers/llm";
164
162
 
165
- const scorer = createFaithfulnessScorer({ model: openai("gpt-4o-mini"), options: {
163
+ const scorer = createFaithfulnessScorer({ model: 'openai/gpt-4o-mini', options: {
166
164
  context: [
167
165
  "Python was created by Guido van Rossum.",
168
166
  "The first version was released in 1991.",
@@ -197,10 +195,9 @@ The score is lower because only a portion of the response is verifiable. While s
197
195
  In this example, the response directly contradicts the context. None of the claims are supported, and several conflict with the facts provided.
198
196
 
199
197
  ```typescript filename="src/example-low-faithfulness.ts" showLineNumbers copy
200
- import { openai } from "@ai-sdk/openai";
201
198
  import { createFaithfulnessScorer } from "@mastra/evals/scorers/llm";
202
199
 
203
- const scorer = createFaithfulnessScorer({ model: openai("gpt-4o-mini"), options: {
200
+ const scorer = createFaithfulnessScorer({ model: 'openai/gpt-4o-mini', options: {
204
201
  context: [
205
202
  "Mars is the fourth planet from the Sun.",
206
203
  "It has a thin atmosphere of mostly carbon dioxide.",
@@ -132,10 +132,9 @@ A hallucination score between 0 and 1:
132
132
  In this example, the response is fully aligned with the provided context. All claims are factually correct and directly supported by the source material, resulting in a low hallucination score.
133
133
 
134
134
  ```typescript filename="src/example-no-hallucination.ts" showLineNumbers copy
135
- import { openai } from "@ai-sdk/openai";
136
135
  import { createHallucinationScorer } from "@mastra/evals/scorers/llm";
137
136
 
138
- const scorer = createHallucinationScorer({ model: openai("gpt-4o-mini"), options: {
137
+ const scorer = createHallucinationScorer({ model: 'openai/gpt-4o-mini', options: {
139
138
  context: [
140
139
  "The iPhone was first released in 2007.",
141
140
  "Steve Jobs unveiled it at Macworld.",
@@ -170,10 +169,9 @@ The response receives a score of 0 because there are no contradictions. Every st
170
169
  In this example, the response includes both accurate and inaccurate claims. Some details align with the context, while others directly contradict it—such as inflated numbers or incorrect locations. These contradictions increase the hallucination score.
171
170
 
172
171
  ```typescript filename="src/example-mixed-hallucination.ts" showLineNumbers copy
173
- import { openai } from "@ai-sdk/openai";
174
172
  import { createHallucinationScorer } from "@mastra/evals/scorers/llm";
175
173
 
176
- const scorer = createHallucinationScorer({ model: openai("gpt-4o-mini"), options: {
174
+ const scorer = createHallucinationScorer({ model: 'openai/gpt-4o-mini', options: {
177
175
  context: [
178
176
  "The first Star Wars movie was released in 1977.",
179
177
  "It was directed by George Lucas.",
@@ -209,10 +207,9 @@ The Scorer assigns a mid-range score because parts of the response conflict with
209
207
  In this example, the response contradicts every key fact in the context. None of the claims can be verified, and all presented details are factually incorrect.
210
208
 
211
209
  ```typescript filename="src/example-complete-hallucination.ts" showLineNumbers copy
212
- import { openai } from "@ai-sdk/openai";
213
210
  import { createHallucinationScorer } from "@mastra/evals/scorers/llm";
214
211
 
215
- const scorer = createHallucinationScorer({ model: openai("gpt-4o-mini"), options: {
212
+ const scorer = createHallucinationScorer({ model: 'openai/gpt-4o-mini', options: {
216
213
  context: [
217
214
  "The Wright brothers made their first flight in 1903.",
218
215
  "The flight lasted 12 seconds.",