@mastra/mcp-docs-server 0.13.30-alpha.0 → 0.13.30-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/organized/changelogs/%40mastra%2Fagent-builder.md +9 -9
- package/.docs/organized/changelogs/%40mastra%2Fai-sdk.md +15 -0
- package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +8 -8
- package/.docs/organized/changelogs/%40mastra%2Fcore.md +29 -29
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloud.md +9 -9
- package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +16 -16
- package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +8 -8
- package/.docs/organized/changelogs/%40mastra%2Fmemory.md +16 -16
- package/.docs/organized/changelogs/%40mastra%2Fpg.md +16 -16
- package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +22 -22
- package/.docs/organized/changelogs/%40mastra%2Freact.md +13 -0
- package/.docs/organized/changelogs/%40mastra%2Fserver.md +8 -8
- package/.docs/organized/changelogs/create-mastra.md +19 -19
- package/.docs/organized/changelogs/mastra.md +27 -27
- package/.docs/organized/code-examples/agent.md +0 -1
- package/.docs/organized/code-examples/agui.md +2 -2
- package/.docs/organized/code-examples/client-side-tools.md +2 -2
- package/.docs/raw/agents/adding-voice.mdx +118 -25
- package/.docs/raw/agents/agent-memory.mdx +73 -89
- package/.docs/raw/agents/guardrails.mdx +1 -1
- package/.docs/raw/agents/overview.mdx +39 -7
- package/.docs/raw/agents/using-tools.mdx +95 -0
- package/.docs/raw/deployment/overview.mdx +9 -11
- package/.docs/raw/frameworks/agentic-uis/ai-sdk.mdx +1 -1
- package/.docs/raw/frameworks/servers/express.mdx +2 -2
- package/.docs/raw/getting-started/installation.mdx +34 -85
- package/.docs/raw/getting-started/mcp-docs-server.mdx +13 -1
- package/.docs/raw/index.mdx +49 -14
- package/.docs/raw/observability/ai-tracing/exporters/otel.mdx +3 -0
- package/.docs/raw/reference/observability/ai-tracing/exporters/otel.mdx +6 -0
- package/.docs/raw/reference/scorers/answer-relevancy.mdx +105 -7
- package/.docs/raw/reference/scorers/answer-similarity.mdx +266 -16
- package/.docs/raw/reference/scorers/bias.mdx +107 -6
- package/.docs/raw/reference/scorers/completeness.mdx +131 -8
- package/.docs/raw/reference/scorers/content-similarity.mdx +107 -8
- package/.docs/raw/reference/scorers/context-precision.mdx +234 -18
- package/.docs/raw/reference/scorers/context-relevance.mdx +418 -35
- package/.docs/raw/reference/scorers/faithfulness.mdx +122 -8
- package/.docs/raw/reference/scorers/hallucination.mdx +125 -8
- package/.docs/raw/reference/scorers/keyword-coverage.mdx +141 -9
- package/.docs/raw/reference/scorers/noise-sensitivity.mdx +478 -6
- package/.docs/raw/reference/scorers/prompt-alignment.mdx +351 -102
- package/.docs/raw/reference/scorers/textual-difference.mdx +134 -6
- package/.docs/raw/reference/scorers/tone-consistency.mdx +133 -0
- package/.docs/raw/reference/scorers/tool-call-accuracy.mdx +422 -65
- package/.docs/raw/reference/scorers/toxicity.mdx +125 -7
- package/.docs/raw/reference/workflows/workflow.mdx +33 -0
- package/.docs/raw/scorers/custom-scorers.mdx +244 -3
- package/.docs/raw/scorers/overview.mdx +8 -38
- package/.docs/raw/server-db/middleware.mdx +5 -2
- package/.docs/raw/server-db/runtime-context.mdx +178 -0
- package/.docs/raw/streaming/workflow-streaming.mdx +5 -1
- package/.docs/raw/tools-mcp/overview.mdx +25 -7
- package/.docs/raw/workflows/overview.mdx +28 -1
- package/CHANGELOG.md +7 -0
- package/package.json +4 -4
- package/.docs/raw/agents/runtime-context.mdx +0 -106
- package/.docs/raw/agents/using-tools-and-mcp.mdx +0 -241
- package/.docs/raw/getting-started/model-providers.mdx +0 -63
- package/.docs/raw/tools-mcp/runtime-context.mdx +0 -63
- /package/.docs/raw/{evals → scorers/evals-old-api}/custom-eval.mdx +0 -0
- /package/.docs/raw/{evals → scorers/evals-old-api}/overview.mdx +0 -0
- /package/.docs/raw/{evals → scorers/evals-old-api}/running-in-ci.mdx +0 -0
- /package/.docs/raw/{evals → scorers/evals-old-api}/textual-evals.mdx +0 -0
- /package/.docs/raw/{server-db → workflows}/snapshots.mdx +0 -0
|
@@ -5,27 +5,24 @@ description: Guide on installing Mastra and setting up the necessary prerequisit
|
|
|
5
5
|
|
|
6
6
|
import { Callout, Steps } from "nextra/components";
|
|
7
7
|
import { Tabs, Tab } from "@/components/tabs";
|
|
8
|
+
import { VideoPlayer } from "@/components/video-player"
|
|
8
9
|
|
|
9
10
|
# Install Mastra
|
|
10
11
|
|
|
11
|
-
|
|
12
|
+
The `create mastra` CLI command is the quickest way to start a new Mastra project. It walks you through setup and creates example agents, workflows, and tools for you to learn from or adapt.
|
|
12
13
|
|
|
13
|
-
|
|
14
|
+
For more control over setup, or to add Mastra to an existing project, see the [manual installation guide](#install-manually). You can also use [`mastra init`](/reference/cli/mastra#mastra-init) for existing projects.
|
|
14
15
|
|
|
15
|
-
##
|
|
16
|
+
## Before you start
|
|
16
17
|
|
|
17
|
-
-
|
|
18
|
-
-
|
|
18
|
+
- You'll need an API key from a [model provider](/models) to complete setup. We recommend starting with [Gemini](https://aistudio.google.com/app/api-keys), as you likely already have a Google account and they don't require a card.
|
|
19
|
+
- Node.js 20 or later.
|
|
19
20
|
|
|
20
|
-
## Install
|
|
21
|
+
## Install with `create mastra`
|
|
21
22
|
|
|
22
|
-
|
|
23
|
+
You can run `create mastra` anywhere on your machine.
|
|
23
24
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
## Start the CLI wizard
|
|
27
|
-
|
|
28
|
-
Run the following command to start the interactive setup:
|
|
25
|
+
The wizard will guide you through setup, create a new directory for your project, and generate a weather agent with example workflows and tools to get you started.
|
|
29
26
|
|
|
30
27
|
{/*
|
|
31
28
|
LLM CONTEXT: This Tabs component shows different package manager commands for creating a new Mastra project.
|
|
@@ -34,109 +31,61 @@ This helps users choose their preferred package manager while following the same
|
|
|
34
31
|
All commands achieve the same result - creating a new Mastra project with the interactive setup.
|
|
35
32
|
*/}
|
|
36
33
|
|
|
37
|
-
<Tabs items={["npm", "
|
|
34
|
+
<Tabs items={["npm", "pnpm", "yarn", "bun"]}>
|
|
38
35
|
<Tab>
|
|
39
36
|
```bash copy
|
|
40
|
-
|
|
37
|
+
npm create mastra@latest -y
|
|
41
38
|
```
|
|
42
39
|
</Tab>
|
|
43
40
|
<Tab>
|
|
44
41
|
```bash copy
|
|
45
|
-
|
|
42
|
+
pnpm create mastra@latest -y
|
|
46
43
|
```
|
|
47
44
|
</Tab>
|
|
48
45
|
<Tab>
|
|
49
46
|
```bash copy
|
|
50
|
-
|
|
47
|
+
yarn create mastra@latest -y
|
|
51
48
|
```
|
|
52
49
|
</Tab>
|
|
53
50
|
<Tab>
|
|
54
51
|
```bash copy
|
|
55
|
-
bun create mastra@latest
|
|
52
|
+
bun create mastra@latest -y
|
|
56
53
|
```
|
|
57
54
|
</Tab>
|
|
58
55
|
</Tabs>
|
|
59
56
|
|
|
57
|
+
<Callout type="default">
|
|
58
|
+
You can use flags with `create mastra` like `--no-example` to skip the example weather agent or `--template` to start from a specific [template](/templates). Read the [CLI reference](/reference/cli/create-mastra) for all options.
|
|
59
|
+
</Callout>
|
|
60
60
|
|
|
61
|
-
**Install using CLI flags**
|
|
62
|
-
|
|
63
|
-
You can also run the Mastra CLI in non-interactive mode by passing all required flags, for example:
|
|
64
|
-
|
|
65
|
-
```bash copy
|
|
66
|
-
npx create-mastra@latest --project-name hello-mastra --example --components tools,agents,workflows --llm openai
|
|
67
|
-
```
|
|
68
|
-
|
|
69
|
-
**Install with a template**
|
|
70
|
-
|
|
71
|
-
Start with a pre-built template that demonstrates specific use cases:
|
|
72
|
-
|
|
73
|
-
```bash copy
|
|
74
|
-
npx create-mastra@latest --template template-name
|
|
75
|
-
```
|
|
76
|
-
|
|
77
|
-
> Browse available templates and learn more in [Templates](/docs/getting-started/templates).
|
|
78
|
-
|
|
79
|
-
For example, to create a text-to-SQL application:
|
|
80
|
-
|
|
81
|
-
```bash copy
|
|
82
|
-
npx create-mastra@latest --template text-to-sql
|
|
83
|
-
```
|
|
84
61
|
|
|
85
|
-
|
|
62
|
+
### Test your agent
|
|
86
63
|
|
|
87
|
-
|
|
64
|
+
Once setup is complete, follow the instructions in your terminal to start the Mastra dev server, then open the Playground at http://localhost:4111.
|
|
88
65
|
|
|
89
|
-
|
|
66
|
+
Try asking about the weather. If your API key is set up correctly, you'll get a response:
|
|
90
67
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
> This example uses OpenAI. Each LLM provider uses a unique name. See [Model Capabilities](/docs/getting-started/model-capability) for more information.
|
|
68
|
+
<VideoPlayer
|
|
69
|
+
src="https://res.cloudinary.com/mastra-assets/video/upload/v1751406022/local-dev-agents-playground_100_m3begx.mp4"
|
|
70
|
+
/>
|
|
95
71
|
|
|
96
|
-
|
|
72
|
+
<Callout type="default">
|
|
73
|
+
If you encounter an error, your API key may not be configured correctly. Double-check your setup and try again. Need more help? [Join our Discord](https://discord.gg/BTYqqHKUrf) and talk to the team directly.
|
|
74
|
+
</Callout>
|
|
97
75
|
|
|
98
|
-
|
|
76
|
+
The [Playground](/docs/server-db/local-dev-playground) lets you rapidly build and prototype agents without needing to build a UI. Once you're ready, you can integrate your Mastra agent into your application using the guides below.
|
|
99
77
|
|
|
100
|
-
{/*
|
|
101
|
-
LLM CONTEXT: This Tabs component shows different package manager commands for starting Mastra's development server.
|
|
102
|
-
Each tab displays the equivalent command for that specific package manager (npx, npm, yarn, pnpm, bun).
|
|
103
|
-
This helps users choose their preferred package manager.
|
|
104
|
-
All commands achieve the same result - starting Mastra's development server.
|
|
105
|
-
*/}
|
|
106
78
|
|
|
107
|
-
|
|
108
|
-
<Tab>
|
|
109
|
-
```bash copy
|
|
110
|
-
npm run dev
|
|
111
|
-
```
|
|
112
|
-
</Tab>
|
|
113
|
-
<Tab>
|
|
114
|
-
```bash copy
|
|
115
|
-
yarn run dev
|
|
116
|
-
```
|
|
117
|
-
</Tab>
|
|
118
|
-
<Tab>
|
|
119
|
-
```bash copy
|
|
120
|
-
pnpm run dev
|
|
121
|
-
```
|
|
122
|
-
</Tab>
|
|
123
|
-
<Tab>
|
|
124
|
-
```bash copy
|
|
125
|
-
bun run dev
|
|
126
|
-
```
|
|
127
|
-
</Tab>
|
|
128
|
-
<Tab>
|
|
129
|
-
```bash copy
|
|
130
|
-
mastra dev
|
|
131
|
-
```
|
|
132
|
-
</Tab>
|
|
133
|
-
</Tabs>
|
|
79
|
+
### Next steps
|
|
134
80
|
|
|
135
|
-
|
|
81
|
+
- Read more about [Mastra's features](/docs#why-mastra).
|
|
82
|
+
- Integrate Mastra with your frontend framework: [Next.js](/docs/frameworks/web-frameworks/next-js), [React](/docs/frameworks/web-frameworks/vite-react), or [Astro](/docs/frameworks/web-frameworks/astro).
|
|
83
|
+
- Build an agent from scratch following one of our [guides](/guides).
|
|
84
|
+
- Watch conceptual guides on our [YouTube channel](https://www.youtube.com/@mastra-ai) and [subscribe](https://www.youtube.com/@mastra-ai?sub_confirmation=1)!
|
|
136
85
|
|
|
137
86
|
## Install manually
|
|
138
87
|
|
|
139
|
-
|
|
88
|
+
If you prefer not to use our automatic `create mastra` CLI tool, you can set up your project yourself by following the guide below.
|
|
140
89
|
|
|
141
90
|
<Steps>
|
|
142
91
|
|
|
@@ -368,7 +317,7 @@ To install Mastra in an existing project, use the `mastra init` command.
|
|
|
368
317
|
|
|
369
318
|
> See [mastra init](/reference/cli/init) for more information.
|
|
370
319
|
|
|
371
|
-
|
|
320
|
+
### Next steps
|
|
372
321
|
|
|
373
322
|
- [Local Development](/docs/server-db/local-dev-playground)
|
|
374
323
|
- [Deploy to Mastra Cloud](/docs/deployment/overview)
|
|
@@ -7,7 +7,7 @@ import YouTube from "@/components/youtube";
|
|
|
7
7
|
|
|
8
8
|
# Mastra Docs Server
|
|
9
9
|
|
|
10
|
-
The `@mastra/mcp-docs-server` package provides direct access to Mastra’s full knowledge base, including documentation, code examples, blog posts, and changelogs, via the [Model Context Protocol (MCP)](https://modelcontextprotocol.io/docs/getting-started/intro). It works with Cursor, Windsurf, Cline, Claude Code, or any tool that supports MCP.
|
|
10
|
+
The `@mastra/mcp-docs-server` package provides direct access to Mastra’s full knowledge base, including documentation, code examples, blog posts, and changelogs, via the [Model Context Protocol (MCP)](https://modelcontextprotocol.io/docs/getting-started/intro). It works with Cursor, Windsurf, Cline, Claude Code, Codex or any tool that supports MCP.
|
|
11
11
|
|
|
12
12
|
These tools are designed to help agents retrieve precise, task-specific information — whether you're adding a feature to an agent, scaffolding a new project, or exploring how something works.
|
|
13
13
|
|
|
@@ -47,6 +47,18 @@ claude mcp add mastra -- npx -y @mastra/mcp-docs-server
|
|
|
47
47
|
|
|
48
48
|
[More info on using MCP servers with Claude Code](https://docs.claude.com/en/docs/claude-code/mcp)
|
|
49
49
|
|
|
50
|
+
### OpenAI Codex CLI
|
|
51
|
+
|
|
52
|
+
1. Register it from the terminal:
|
|
53
|
+
|
|
54
|
+
```bash copy
|
|
55
|
+
codex mcp add mastra-docs -- npx -y @mastra/mcp-docs-server
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
2. Run `codex mcp list` to confirm the server shows as `enabled`.
|
|
59
|
+
|
|
60
|
+
[More info on using MCP servers with OpenAI Codex](https://developers.openai.com/codex/mcp)
|
|
61
|
+
|
|
50
62
|
### Cursor
|
|
51
63
|
|
|
52
64
|
Install by clicking the button below:
|
package/.docs/raw/index.mdx
CHANGED
|
@@ -1,23 +1,58 @@
|
|
|
1
1
|
---
|
|
2
|
-
title: "
|
|
3
|
-
description: "Mastra is
|
|
2
|
+
title: "About Mastra | Mastra Docs"
|
|
3
|
+
description: "Mastra is an all-in-one framework for building AI-powered applications and agents with a modern TypeScript stack."
|
|
4
4
|
---
|
|
5
5
|
|
|
6
|
+
import YouTube from "@/components/youtube";
|
|
7
|
+
|
|
6
8
|
# About Mastra
|
|
7
9
|
|
|
8
|
-
Mastra is
|
|
10
|
+
From the team behind Gatsby, Mastra is a framework for building AI-powered applications and agents with a modern TypeScript stack.
|
|
11
|
+
|
|
12
|
+
It includes everything you need to go from early prototypes to production-ready applications. Mastra integrates with frontend and backend frameworks like React, Next.js, and Node, or you can deploy it anywhere as a standalone server. It's the easiest way to build, tune, and scale reliable AI products.
|
|
13
|
+
|
|
14
|
+
<YouTube id="8o_Ejbcw5s8" />
|
|
15
|
+
|
|
16
|
+
## Why Mastra?
|
|
17
|
+
|
|
18
|
+
Purpose-built for TypeScript and designed around established AI patterns, Mastra gives you everything you need to build great AI applications out-of-the-box.
|
|
19
|
+
|
|
20
|
+
Some highlights include:
|
|
21
|
+
|
|
22
|
+
- [**Model routing**](/models) - Connect to 40+ providers through one standard interface. Use models from OpenAI, Anthropic, Gemini, and more.
|
|
23
|
+
|
|
24
|
+
- [**Agents**](/docs/agents/overview) - Build autonomous agents that use LLMs and tools to solve open-ended tasks. Agents reason about goals, decide which tools to use, and iterate internally until the model emits a final answer or an optional stopping condition is met.
|
|
25
|
+
|
|
26
|
+
- [**Workflows**](/docs/workflows/overview) - When you need explicit control over execution, use Mastra's graph-based workflow engine to orchestrate complex multi-step processes. Mastra workflows use an intuitive syntax for control flow (`.then()`, `.branch()`, `.parallel()`).
|
|
27
|
+
|
|
28
|
+
- [**Human-in-the-loop**](/docs/workflows/suspend-and-resume) - Suspend an agent or workflow and await user input or approval before resuming. Mastra uses [storage](/docs/server-db/storage) to remember execution state, so you can pause indefinitely and resume where you left off.
|
|
29
|
+
|
|
30
|
+
- **Context management** - Give your agents the right context at the right time. Provide [conversation history](/docs/memory/conversation-history), [retrieve](/docs/rag/overview) data from your sources (APIs, databases, files), and add human-like [working](/docs/memory/working-memory) and [semantic](/docs/memory/semantic-recall) memory so your agents behave coherently.
|
|
31
|
+
|
|
32
|
+
- **Integrations** - Bundle agents and workflows into existing React, Next.js, or Node.js apps, or ship them as standalone endpoints. When building UIs, integrate with agentic libraries like Vercel's AI SDK UI and CopilotKit to bring your AI assistant to life on the web.
|
|
33
|
+
|
|
34
|
+
- **Production essentials** - Shipping reliable agents takes ongoing insight, evaluation, and iteration. With built-in [evals](/docs/evals/overview) and [observability](/docs/observability/overview), Mastra gives you the tools to observe, measure, and refine continuously.
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
## What can you build?
|
|
38
|
+
|
|
39
|
+
- AI-powered applications that combine language understanding, reasoning, and action to solve real-world tasks.
|
|
40
|
+
|
|
41
|
+
- Conversational agents for customer support, onboarding, or internal queries.
|
|
42
|
+
|
|
43
|
+
- Domain-specific copilots for coding, legal, finance, research, or creative work.
|
|
44
|
+
|
|
45
|
+
- Workflow automations that trigger, route, and complete multi-step processes.
|
|
46
|
+
|
|
47
|
+
- Decision-support tools that analyse data and provide actionable recommendations.
|
|
48
|
+
|
|
49
|
+
Explore real-world examples in our [case studies](/blog/category/case-studies) and [community showcase](/showcase).
|
|
50
|
+
|
|
9
51
|
|
|
10
|
-
|
|
52
|
+
## Get started
|
|
11
53
|
|
|
12
|
-
|
|
54
|
+
Follow the [Installation guide](/docs/getting-started/installation) for step-by-step setup with the CLI or a manual install.
|
|
13
55
|
|
|
14
|
-
|
|
56
|
+
If you're new to AI agents, check out our [templates](/docs/getting-started/templates), [course](/course), and [YouTube videos](https://youtube.com/@mastra-ai) to start building with Mastra today.
|
|
15
57
|
|
|
16
|
-
|
|
17
|
-
- **[Agent memory and tool calling](/docs/agents/agent-memory.mdx)**: With Mastra, you can give your agent tools (functions) that it can call. You can persist agent memory and retrieve it based on recency, semantic similarity, or conversation thread.
|
|
18
|
-
- **[Workflow graphs](/docs/workflows/overview.mdx)**: When you want to execute LLM calls in a deterministic way, Mastra gives you a graph-based workflow engine. You can define discrete steps, log inputs and outputs at each step of each run, and pipe them into an observability tool. Mastra workflows have a simple syntax for control flow (`.then()`, `.branch()`, `.parallel()`) that allows branching and chaining.
|
|
19
|
-
- **[Agent development playground](/docs/server-db/local-dev-playground.mdx)**: When you're developing an agent locally, you can chat with it and see its state and memory in Mastra's agent development environment.
|
|
20
|
-
- **[Retrieval-augmented generation (RAG)](/docs/rag/overview.mdx)**: Mastra gives you APIs to process documents (text, HTML, Markdown, JSON) into chunks, create embeddings, and store them in a vector database. At query time, it retrieves relevant chunks to ground LLM responses in your data, with a unified API on top of multiple vector stores (Pinecone, pgvector, etc) and embedding providers (OpenAI, Cohere, etc).
|
|
21
|
-
- **[Deployment](/docs/deployment/deployment.mdx)**: Mastra supports bundling your agents and workflows within an existing React, Next.js, or Node.js application, or into standalone endpoints. The Mastra deploy helper lets you easily bundle agents and workflows into a Node.js server using Hono, or deploy it onto a serverless platform like Vercel, Cloudflare Workers, or Netlify.
|
|
22
|
-
- **[Evals](/docs/evals/overview.mdx)**: Mastra provides automated evaluation metrics that use model-graded, rule-based, and statistical methods to assess LLM outputs, with built-in metrics for toxicity, bias, relevance, and factual accuracy. You can also define your own evals.
|
|
23
|
-
- **[Observability](/docs/observability/overview.mdx)**: Mastra provides specialized AI tracing to monitor LLM operations, agent decisions, and tool executions. Track token usage, latency, and conversation flows with native exporters for Langfuse, Braintrust, and Mastra Cloud. Structured logging provides additional debugging capabilities for comprehensive monitoring.
|
|
58
|
+
We can't wait to see what you build ✌️
|
|
@@ -57,6 +57,12 @@ interface OtelExporterConfig {
|
|
|
57
57
|
description: "Logger level (default: 'warn')",
|
|
58
58
|
required: false,
|
|
59
59
|
},
|
|
60
|
+
{
|
|
61
|
+
name: "resourceAttributes",
|
|
62
|
+
type: "DetectedResourceAttributes",
|
|
63
|
+
description: "Optional OpenTelemetry Resource Attributes (values here override any defaults)",
|
|
64
|
+
required: false,
|
|
65
|
+
}
|
|
60
66
|
]}
|
|
61
67
|
/>
|
|
62
68
|
|
|
@@ -7,8 +7,6 @@ description: Documentation for the Answer Relevancy Scorer in Mastra, which eval
|
|
|
7
7
|
|
|
8
8
|
The `createAnswerRelevancyScorer()` function accepts a single options object with the following properties:
|
|
9
9
|
|
|
10
|
-
For usage example, see the [Answer Relevancy Examples](/examples/scorers/answer-relevancy).
|
|
11
|
-
|
|
12
10
|
## Parameters
|
|
13
11
|
|
|
14
12
|
<PropertiesTable
|
|
@@ -103,11 +101,111 @@ The scorer evaluates relevancy through query-answer alignment, considering compl
|
|
|
103
101
|
|
|
104
102
|
### Score Interpretation
|
|
105
103
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
-
|
|
109
|
-
- 0.
|
|
110
|
-
- 0.0
|
|
104
|
+
A relevancy score between 0 and 1:
|
|
105
|
+
|
|
106
|
+
- **1.0**: The response fully answers the query with relevant and focused information.
|
|
107
|
+
- **0.7–0.9**: The response mostly answers the query but may include minor unrelated content.
|
|
108
|
+
- **0.4–0.6**: The response partially answers the query, mixing relevant and unrelated information.
|
|
109
|
+
- **0.1–0.3**: The response includes minimal relevant content and largely misses the intent of the query.
|
|
110
|
+
- **0.0**: The response is entirely unrelated and does not answer the query.
|
|
111
|
+
|
|
112
|
+
## Examples
|
|
113
|
+
|
|
114
|
+
### High relevancy example
|
|
115
|
+
|
|
116
|
+
In this example, the response accurately addresses the input query with specific and relevant information.
|
|
117
|
+
|
|
118
|
+
```typescript filename="src/example-high-answer-relevancy.ts" showLineNumbers copy
|
|
119
|
+
import { openai } from "@ai-sdk/openai";
|
|
120
|
+
import { createAnswerRelevancyScorer } from "@mastra/evals/scorers/llm";
|
|
121
|
+
|
|
122
|
+
const scorer = createAnswerRelevancyScorer({ model: openai("gpt-4o-mini") });
|
|
123
|
+
|
|
124
|
+
const inputMessages = [{ role: 'user', content: "What are the health benefits of regular exercise?" }];
|
|
125
|
+
const outputMessage = { text: "Regular exercise improves cardiovascular health, strengthens muscles, boosts metabolism, and enhances mental well-being through the release of endorphins." };
|
|
126
|
+
|
|
127
|
+
const result = await scorer.run({
|
|
128
|
+
input: inputMessages,
|
|
129
|
+
output: outputMessage,
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
console.log(result);
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
#### High relevancy output
|
|
136
|
+
|
|
137
|
+
The output receives a high score because it accurately answers the query without including unrelated information.
|
|
138
|
+
|
|
139
|
+
```typescript
|
|
140
|
+
{
|
|
141
|
+
score: 1,
|
|
142
|
+
reason: 'The score is 1 because the output directly addresses the question by providing multiple explicit health benefits of regular exercise, including improvements in cardiovascular health, muscle strength, metabolism, and mental well-being. Each point is relevant and contributes to a comprehensive understanding of the health benefits.'
|
|
143
|
+
}
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### Partial relevancy example
|
|
147
|
+
|
|
148
|
+
In this example, the response addresses the query in part but includes additional information that isn’t directly relevant.
|
|
149
|
+
|
|
150
|
+
```typescript filename="src/example-partial-answer-relevancy.ts" showLineNumbers copy
|
|
151
|
+
import { openai } from "@ai-sdk/openai";
|
|
152
|
+
import { createAnswerRelevancyScorer } from "@mastra/evals/scorers/llm";
|
|
153
|
+
|
|
154
|
+
const scorer = createAnswerRelevancyScorer({ model: openai("gpt-4o-mini") });
|
|
155
|
+
|
|
156
|
+
const inputMessages = [{ role: 'user', content: "What should a healthy breakfast include?" }];
|
|
157
|
+
const outputMessage = { text: "A nutritious breakfast should include whole grains and protein. However, the timing of your breakfast is just as important - studies show eating within 2 hours of waking optimizes metabolism and energy levels throughout the day." };
|
|
158
|
+
|
|
159
|
+
const result = await scorer.run({
|
|
160
|
+
input: inputMessages,
|
|
161
|
+
output: outputMessage,
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
console.log(result);
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
#### Partial relevancy output
|
|
168
|
+
|
|
169
|
+
The output receives a lower score because it partially answers the query. While some relevant information is included, unrelated details reduce the overall relevance.
|
|
170
|
+
|
|
171
|
+
```typescript
|
|
172
|
+
{
|
|
173
|
+
score: 0.25,
|
|
174
|
+
reason: 'The score is 0.25 because the output provides a direct answer by mentioning whole grains and protein as components of a healthy breakfast, which is relevant. However, the additional information about the timing of breakfast and its effects on metabolism and energy levels is not directly related to the question, leading to a lower overall relevance score.'
|
|
175
|
+
}
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
## Low relevancy example
|
|
179
|
+
|
|
180
|
+
In this example, the response does not address the query and contains information that is entirely unrelated.
|
|
181
|
+
|
|
182
|
+
```typescript filename="src/example-low-answer-relevancy.ts" showLineNumbers copy
|
|
183
|
+
import { openai } from "@ai-sdk/openai";
|
|
184
|
+
import { createAnswerRelevancyScorer } from "@mastra/evals/scorers/llm";
|
|
185
|
+
|
|
186
|
+
const scorer = createAnswerRelevancyScorer({ model: openai("gpt-4o-mini") });
|
|
187
|
+
|
|
188
|
+
const inputMessages = [{ role: 'user', content: "What are the benefits of meditation?" }];
|
|
189
|
+
const outputMessage = { text: "The Great Wall of China is over 13,000 miles long and was built during the Ming Dynasty to protect against invasions." };
|
|
190
|
+
|
|
191
|
+
const result = await scorer.run({
|
|
192
|
+
input: inputMessages,
|
|
193
|
+
output: outputMessage,
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
console.log(result);
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
#### Low relevancy output
|
|
200
|
+
|
|
201
|
+
The output receives a score of 0 because it fails to answer the query or provide any relevant information.
|
|
202
|
+
|
|
203
|
+
```typescript
|
|
204
|
+
{
|
|
205
|
+
score: 0,
|
|
206
|
+
reason: 'The score is 0 because the output about the Great Wall of China is completely unrelated to the benefits of meditation, providing no relevant information or context that addresses the input question.'
|
|
207
|
+
}
|
|
208
|
+
```
|
|
111
209
|
|
|
112
210
|
## Related
|
|
113
211
|
|