@mastra/mcp-docs-server 0.0.0-commonjs-20250414101718
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/organized/changelogs/%40mastra%2Fastra.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fchroma.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fclickhouse.md +161 -0
- package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fcloudflare.md +110 -0
- package/.docs/organized/changelogs/%40mastra%2Fcore.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-netlify.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-vercel.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fevals.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Ffirecrawl.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fgithub.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Floggers.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fmcp-registry-registry.md +26 -0
- package/.docs/organized/changelogs/%40mastra%2Fmcp.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fmem0.md +196 -0
- package/.docs/organized/changelogs/%40mastra%2Fmemory.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fpg.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fpinecone.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fqdrant.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Frag.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fragie.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fserver.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-azure.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-deepgram.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-elevenlabs.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-google.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-ibm.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-murf.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-openai.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-playai.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-replicate.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-speechify.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fturbopuffer.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fupstash.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fvectorize.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-azure.md +250 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-cloudflare.md +250 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-deepgram.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-elevenlabs.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-google.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-murf.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-openai-realtime.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-openai.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-playai.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-sarvam.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-speechify.md +302 -0
- package/.docs/organized/changelogs/create-mastra.md +302 -0
- package/.docs/organized/changelogs/mastra.md +302 -0
- package/.docs/organized/code-examples/agent-network.md +282 -0
- package/.docs/organized/code-examples/agent.md +388 -0
- package/.docs/organized/code-examples/ai-sdk-useChat.md +378 -0
- package/.docs/organized/code-examples/assistant-ui.md +37 -0
- package/.docs/organized/code-examples/bird-checker-with-express.md +235 -0
- package/.docs/organized/code-examples/bird-checker-with-nextjs-and-eval.md +360 -0
- package/.docs/organized/code-examples/bird-checker-with-nextjs.md +250 -0
- package/.docs/organized/code-examples/client-side-tools.md +69 -0
- package/.docs/organized/code-examples/crypto-chatbot.md +96 -0
- package/.docs/organized/code-examples/fireworks-r1.md +159 -0
- package/.docs/organized/code-examples/mcp-registry-registry.md +63 -0
- package/.docs/organized/code-examples/memory-todo-agent.md +164 -0
- package/.docs/organized/code-examples/memory-with-context.md +167 -0
- package/.docs/organized/code-examples/memory-with-libsql.md +204 -0
- package/.docs/organized/code-examples/memory-with-mem0.md +121 -0
- package/.docs/organized/code-examples/memory-with-pg.md +224 -0
- package/.docs/organized/code-examples/memory-with-upstash.md +268 -0
- package/.docs/organized/code-examples/quick-start.md +129 -0
- package/.docs/organized/code-examples/stock-price-tool.md +124 -0
- package/.docs/organized/code-examples/weather-agent.md +353 -0
- package/.docs/organized/code-examples/workflow-ai-recruiter.md +159 -0
- package/.docs/organized/code-examples/workflow-with-inline-steps.md +111 -0
- package/.docs/organized/code-examples/workflow-with-memory.md +393 -0
- package/.docs/organized/code-examples/workflow-with-separate-steps.md +131 -0
- package/.docs/raw/agents/adding-tools.mdx +317 -0
- package/.docs/raw/agents/adding-voice.mdx +175 -0
- package/.docs/raw/agents/agent-memory.mdx +62 -0
- package/.docs/raw/agents/mcp-guide.mdx +215 -0
- package/.docs/raw/agents/overview.mdx +303 -0
- package/.docs/raw/community/discord.mdx +12 -0
- package/.docs/raw/community/licensing.mdx +63 -0
- package/.docs/raw/deployment/client.mdx +120 -0
- package/.docs/raw/deployment/deployment.mdx +127 -0
- package/.docs/raw/deployment/server.mdx +282 -0
- package/.docs/raw/evals/custom-eval.mdx +22 -0
- package/.docs/raw/evals/overview.mdx +95 -0
- package/.docs/raw/evals/running-in-ci.mdx +81 -0
- package/.docs/raw/evals/textual-evals.mdx +54 -0
- package/.docs/raw/faq/index.mdx +63 -0
- package/.docs/raw/frameworks/ai-sdk.mdx +296 -0
- package/.docs/raw/frameworks/next-js.mdx +238 -0
- package/.docs/raw/getting-started/installation.mdx +442 -0
- package/.docs/raw/getting-started/mcp-docs-server.mdx +141 -0
- package/.docs/raw/getting-started/project-structure.mdx +80 -0
- package/.docs/raw/index.mdx +22 -0
- package/.docs/raw/integrations/index.mdx +213 -0
- package/.docs/raw/local-dev/add-to-existing-project.mdx +48 -0
- package/.docs/raw/local-dev/creating-a-new-project.mdx +54 -0
- package/.docs/raw/local-dev/mastra-dev.mdx +108 -0
- package/.docs/raw/memory/memory-processors.mdx +131 -0
- package/.docs/raw/memory/overview.mdx +119 -0
- package/.docs/raw/memory/semantic-recall.mdx +122 -0
- package/.docs/raw/memory/working-memory.mdx +87 -0
- package/.docs/raw/observability/logging.mdx +38 -0
- package/.docs/raw/observability/nextjs-tracing.mdx +108 -0
- package/.docs/raw/observability/tracing.mdx +115 -0
- package/.docs/raw/rag/chunking-and-embedding.mdx +156 -0
- package/.docs/raw/rag/overview.mdx +85 -0
- package/.docs/raw/rag/retrieval.mdx +365 -0
- package/.docs/raw/rag/vector-databases.mdx +340 -0
- package/.docs/raw/reference/agents/createTool.mdx +229 -0
- package/.docs/raw/reference/agents/generate.mdx +334 -0
- package/.docs/raw/reference/agents/getAgent.mdx +54 -0
- package/.docs/raw/reference/agents/stream.mdx +369 -0
- package/.docs/raw/reference/cli/build.mdx +55 -0
- package/.docs/raw/reference/cli/dev.mdx +134 -0
- package/.docs/raw/reference/cli/init.mdx +43 -0
- package/.docs/raw/reference/client-js/agents.mdx +107 -0
- package/.docs/raw/reference/client-js/error-handling.mdx +38 -0
- package/.docs/raw/reference/client-js/logs.mdx +24 -0
- package/.docs/raw/reference/client-js/memory.mdx +97 -0
- package/.docs/raw/reference/client-js/telemetry.mdx +20 -0
- package/.docs/raw/reference/client-js/tools.mdx +44 -0
- package/.docs/raw/reference/client-js/vectors.mdx +79 -0
- package/.docs/raw/reference/client-js/workflows.mdx +136 -0
- package/.docs/raw/reference/core/mastra-class.mdx +232 -0
- package/.docs/raw/reference/deployer/cloudflare.mdx +207 -0
- package/.docs/raw/reference/deployer/deployer.mdx +159 -0
- package/.docs/raw/reference/deployer/netlify.mdx +109 -0
- package/.docs/raw/reference/deployer/vercel.mdx +117 -0
- package/.docs/raw/reference/evals/answer-relevancy.mdx +186 -0
- package/.docs/raw/reference/evals/bias.mdx +186 -0
- package/.docs/raw/reference/evals/completeness.mdx +174 -0
- package/.docs/raw/reference/evals/content-similarity.mdx +183 -0
- package/.docs/raw/reference/evals/context-position.mdx +190 -0
- package/.docs/raw/reference/evals/context-precision.mdx +189 -0
- package/.docs/raw/reference/evals/context-relevancy.mdx +188 -0
- package/.docs/raw/reference/evals/contextual-recall.mdx +191 -0
- package/.docs/raw/reference/evals/faithfulness.mdx +193 -0
- package/.docs/raw/reference/evals/hallucination.mdx +219 -0
- package/.docs/raw/reference/evals/keyword-coverage.mdx +176 -0
- package/.docs/raw/reference/evals/prompt-alignment.mdx +238 -0
- package/.docs/raw/reference/evals/summarization.mdx +205 -0
- package/.docs/raw/reference/evals/textual-difference.mdx +161 -0
- package/.docs/raw/reference/evals/tone-consistency.mdx +181 -0
- package/.docs/raw/reference/evals/toxicity.mdx +165 -0
- package/.docs/raw/reference/index.mdx +12 -0
- package/.docs/raw/reference/memory/Memory.mdx +212 -0
- package/.docs/raw/reference/memory/createThread.mdx +95 -0
- package/.docs/raw/reference/memory/getThreadById.mdx +46 -0
- package/.docs/raw/reference/memory/getThreadsByResourceId.mdx +48 -0
- package/.docs/raw/reference/memory/query.mdx +167 -0
- package/.docs/raw/reference/networks/agent-network.mdx +159 -0
- package/.docs/raw/reference/observability/create-logger.mdx +106 -0
- package/.docs/raw/reference/observability/logger.mdx +55 -0
- package/.docs/raw/reference/observability/otel-config.mdx +120 -0
- package/.docs/raw/reference/observability/providers/braintrust.mdx +40 -0
- package/.docs/raw/reference/observability/providers/dash0.mdx +40 -0
- package/.docs/raw/reference/observability/providers/index.mdx +16 -0
- package/.docs/raw/reference/observability/providers/laminar.mdx +41 -0
- package/.docs/raw/reference/observability/providers/langfuse.mdx +51 -0
- package/.docs/raw/reference/observability/providers/langsmith.mdx +48 -0
- package/.docs/raw/reference/observability/providers/langwatch.mdx +45 -0
- package/.docs/raw/reference/observability/providers/new-relic.mdx +40 -0
- package/.docs/raw/reference/observability/providers/signoz.mdx +40 -0
- package/.docs/raw/reference/observability/providers/traceloop.mdx +40 -0
- package/.docs/raw/reference/rag/astra.mdx +258 -0
- package/.docs/raw/reference/rag/chroma.mdx +281 -0
- package/.docs/raw/reference/rag/chunk.mdx +235 -0
- package/.docs/raw/reference/rag/document.mdx +127 -0
- package/.docs/raw/reference/rag/embeddings.mdx +160 -0
- package/.docs/raw/reference/rag/extract-params.mdx +226 -0
- package/.docs/raw/reference/rag/graph-rag.mdx +182 -0
- package/.docs/raw/reference/rag/libsql.mdx +357 -0
- package/.docs/raw/reference/rag/metadata-filters.mdx +298 -0
- package/.docs/raw/reference/rag/pg.mdx +477 -0
- package/.docs/raw/reference/rag/pinecone.mdx +281 -0
- package/.docs/raw/reference/rag/qdrant.mdx +236 -0
- package/.docs/raw/reference/rag/rerank.mdx +212 -0
- package/.docs/raw/reference/rag/turbopuffer.mdx +249 -0
- package/.docs/raw/reference/rag/upstash.mdx +247 -0
- package/.docs/raw/reference/rag/vectorize.mdx +298 -0
- package/.docs/raw/reference/storage/libsql.mdx +74 -0
- package/.docs/raw/reference/storage/postgresql.mdx +48 -0
- package/.docs/raw/reference/storage/upstash.mdx +86 -0
- package/.docs/raw/reference/tools/client.mdx +207 -0
- package/.docs/raw/reference/tools/document-chunker-tool.mdx +141 -0
- package/.docs/raw/reference/tools/graph-rag-tool.mdx +154 -0
- package/.docs/raw/reference/tools/mcp-configuration.mdx +206 -0
- package/.docs/raw/reference/tools/vector-query-tool.mdx +212 -0
- package/.docs/raw/reference/voice/composite-voice.mdx +140 -0
- package/.docs/raw/reference/voice/deepgram.mdx +164 -0
- package/.docs/raw/reference/voice/elevenlabs.mdx +216 -0
- package/.docs/raw/reference/voice/google.mdx +198 -0
- package/.docs/raw/reference/voice/mastra-voice.mdx +394 -0
- package/.docs/raw/reference/voice/murf.mdx +251 -0
- package/.docs/raw/reference/voice/openai-realtime.mdx +431 -0
- package/.docs/raw/reference/voice/openai.mdx +168 -0
- package/.docs/raw/reference/voice/playai.mdx +159 -0
- package/.docs/raw/reference/voice/sarvam.mdx +260 -0
- package/.docs/raw/reference/voice/speechify.mdx +145 -0
- package/.docs/raw/reference/voice/voice.answer.mdx +122 -0
- package/.docs/raw/reference/voice/voice.connect.mdx +124 -0
- package/.docs/raw/reference/voice/voice.listen.mdx +195 -0
- package/.docs/raw/reference/voice/voice.on.mdx +189 -0
- package/.docs/raw/reference/voice/voice.send.mdx +118 -0
- package/.docs/raw/reference/voice/voice.speak.mdx +203 -0
- package/.docs/raw/reference/workflows/after.mdx +88 -0
- package/.docs/raw/reference/workflows/afterEvent.mdx +76 -0
- package/.docs/raw/reference/workflows/commit.mdx +37 -0
- package/.docs/raw/reference/workflows/createRun.mdx +77 -0
- package/.docs/raw/reference/workflows/else.mdx +72 -0
- package/.docs/raw/reference/workflows/events.mdx +305 -0
- package/.docs/raw/reference/workflows/execute.mdx +110 -0
- package/.docs/raw/reference/workflows/if.mdx +107 -0
- package/.docs/raw/reference/workflows/resume.mdx +155 -0
- package/.docs/raw/reference/workflows/resumeWithEvent.mdx +133 -0
- package/.docs/raw/reference/workflows/snapshots.mdx +207 -0
- package/.docs/raw/reference/workflows/start.mdx +84 -0
- package/.docs/raw/reference/workflows/step-class.mdx +100 -0
- package/.docs/raw/reference/workflows/step-condition.mdx +134 -0
- package/.docs/raw/reference/workflows/step-function.mdx +92 -0
- package/.docs/raw/reference/workflows/step-options.mdx +69 -0
- package/.docs/raw/reference/workflows/step-retries.mdx +203 -0
- package/.docs/raw/reference/workflows/suspend.mdx +70 -0
- package/.docs/raw/reference/workflows/then.mdx +74 -0
- package/.docs/raw/reference/workflows/until.mdx +165 -0
- package/.docs/raw/reference/workflows/watch.mdx +118 -0
- package/.docs/raw/reference/workflows/while.mdx +168 -0
- package/.docs/raw/reference/workflows/workflow.mdx +233 -0
- package/.docs/raw/storage/overview.mdx +378 -0
- package/.docs/raw/voice/overview.mdx +135 -0
- package/.docs/raw/voice/speech-to-text.mdx +45 -0
- package/.docs/raw/voice/text-to-speech.mdx +52 -0
- package/.docs/raw/voice/voice-to-voice.mdx +310 -0
- package/.docs/raw/workflows/control-flow.mdx +778 -0
- package/.docs/raw/workflows/dynamic-workflows.mdx +236 -0
- package/.docs/raw/workflows/error-handling.mdx +183 -0
- package/.docs/raw/workflows/nested-workflows.mdx +352 -0
- package/.docs/raw/workflows/overview.mdx +203 -0
- package/.docs/raw/workflows/steps.mdx +108 -0
- package/.docs/raw/workflows/suspend-and-resume.mdx +404 -0
- package/.docs/raw/workflows/variables.mdx +313 -0
- package/LICENSE.md +46 -0
- package/README.md +129 -0
- package/dist/_tsup-dts-rollup.d.ts +149 -0
- package/dist/chunk-QWYMT5LP.js +194 -0
- package/dist/prepare-docs/prepare.d.ts +1 -0
- package/dist/prepare-docs/prepare.js +1 -0
- package/dist/stdio.d.ts +1 -0
- package/dist/stdio.js +518 -0
- package/package.json +60 -0
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: Context Relevancy | Evals | Mastra Docs"
|
|
3
|
+
description: Documentation for the Context Relevancy Metric, which evaluates the relevance of retrieved context in RAG pipelines.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# ContextRelevancyMetric
|
|
7
|
+
|
|
8
|
+
The `ContextRelevancyMetric` class evaluates the quality of your RAG (Retrieval-Augmented Generation) pipeline's retriever by measuring how relevant the retrieved context is to the input query. It uses an LLM-based evaluation system that first extracts statements from the context and then assesses their relevance to the input.
|
|
9
|
+
|
|
10
|
+
## Basic Usage
|
|
11
|
+
|
|
12
|
+
```typescript
|
|
13
|
+
import { openai } from "@ai-sdk/openai";
|
|
14
|
+
import { ContextRelevancyMetric } from "@mastra/evals/llm";
|
|
15
|
+
|
|
16
|
+
// Configure the model for evaluation
|
|
17
|
+
const model = openai("gpt-4o-mini");
|
|
18
|
+
|
|
19
|
+
const metric = new ContextRelevancyMetric(model, {
|
|
20
|
+
context: [
|
|
21
|
+
"All data is encrypted at rest and in transit",
|
|
22
|
+
"Two-factor authentication is mandatory",
|
|
23
|
+
"The platform supports multiple languages",
|
|
24
|
+
"Our offices are located in San Francisco"
|
|
25
|
+
]
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
const result = await metric.measure(
|
|
29
|
+
"What are our product's security features?",
|
|
30
|
+
"Our product uses encryption and requires 2FA.",
|
|
31
|
+
);
|
|
32
|
+
|
|
33
|
+
console.log(result.score); // Score from 0-1
|
|
34
|
+
console.log(result.info.reason); // Explanation of the relevancy assessment
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Constructor Parameters
|
|
38
|
+
|
|
39
|
+
<PropertiesTable
|
|
40
|
+
content={[
|
|
41
|
+
{
|
|
42
|
+
name: "model",
|
|
43
|
+
type: "LanguageModel",
|
|
44
|
+
description: "Configuration for the model used to evaluate context relevancy",
|
|
45
|
+
isOptional: false,
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
name: "options",
|
|
49
|
+
type: "ContextRelevancyMetricOptions",
|
|
50
|
+
description: "Configuration options for the metric",
|
|
51
|
+
isOptional: false,
|
|
52
|
+
}
|
|
53
|
+
]}
|
|
54
|
+
/>
|
|
55
|
+
|
|
56
|
+
### ContextRelevancyMetricOptions
|
|
57
|
+
|
|
58
|
+
<PropertiesTable
|
|
59
|
+
content={[
|
|
60
|
+
{
|
|
61
|
+
name: "scale",
|
|
62
|
+
type: "number",
|
|
63
|
+
description: "Maximum score value",
|
|
64
|
+
isOptional: true,
|
|
65
|
+
defaultValue: "1",
|
|
66
|
+
},
|
|
67
|
+
{
|
|
68
|
+
name: "context",
|
|
69
|
+
type: "string[]",
|
|
70
|
+
description: "Array of retrieved context documents used to generate the response",
|
|
71
|
+
isOptional: false,
|
|
72
|
+
}
|
|
73
|
+
]}
|
|
74
|
+
/>
|
|
75
|
+
|
|
76
|
+
## measure() Parameters
|
|
77
|
+
|
|
78
|
+
<PropertiesTable
|
|
79
|
+
content={[
|
|
80
|
+
{
|
|
81
|
+
name: "input",
|
|
82
|
+
type: "string",
|
|
83
|
+
description: "The original query or prompt",
|
|
84
|
+
isOptional: false,
|
|
85
|
+
},
|
|
86
|
+
{
|
|
87
|
+
name: "output",
|
|
88
|
+
type: "string",
|
|
89
|
+
description: "The LLM's response to evaluate",
|
|
90
|
+
isOptional: false,
|
|
91
|
+
}
|
|
92
|
+
]}
|
|
93
|
+
/>
|
|
94
|
+
|
|
95
|
+
## Returns
|
|
96
|
+
|
|
97
|
+
<PropertiesTable
|
|
98
|
+
content={[
|
|
99
|
+
{
|
|
100
|
+
name: "score",
|
|
101
|
+
type: "number",
|
|
102
|
+
description: "Context relevancy score (0 to scale, default 0-1)",
|
|
103
|
+
},
|
|
104
|
+
{
|
|
105
|
+
name: "info",
|
|
106
|
+
type: "object",
|
|
107
|
+
description: "Object containing the reason for the score",
|
|
108
|
+
properties: [
|
|
109
|
+
{
|
|
110
|
+
type: "string",
|
|
111
|
+
parameters: [
|
|
112
|
+
{
|
|
113
|
+
name: "reason",
|
|
114
|
+
type: "string",
|
|
115
|
+
description: "Detailed explanation of the relevancy assessment",
|
|
116
|
+
}
|
|
117
|
+
]
|
|
118
|
+
}
|
|
119
|
+
]
|
|
120
|
+
}
|
|
121
|
+
]}
|
|
122
|
+
/>
|
|
123
|
+
|
|
124
|
+
## Scoring Details
|
|
125
|
+
|
|
126
|
+
The metric evaluates how well retrieved context matches the query through binary relevance classification.
|
|
127
|
+
|
|
128
|
+
### Scoring Process
|
|
129
|
+
|
|
130
|
+
1. Extracts statements from context:
|
|
131
|
+
- Breaks down context into meaningful units
|
|
132
|
+
- Preserves semantic relationships
|
|
133
|
+
|
|
134
|
+
2. Evaluates statement relevance:
|
|
135
|
+
- Assesses each statement against query
|
|
136
|
+
- Counts relevant statements
|
|
137
|
+
- Calculates relevance ratio
|
|
138
|
+
|
|
139
|
+
Final score: `(relevant_statements / total_statements) * scale`
|
|
140
|
+
|
|
141
|
+
### Score interpretation
|
|
142
|
+
(0 to scale, default 0-1)
|
|
143
|
+
- 1.0: Perfect relevancy - all retrieved context is relevant
|
|
144
|
+
- 0.7-0.9: High relevancy - most context is relevant with few irrelevant pieces
|
|
145
|
+
- 0.4-0.6: Moderate relevancy - a mix of relevant and irrelevant context
|
|
146
|
+
- 0.1-0.3: Low relevancy - mostly irrelevant context
|
|
147
|
+
- 0.0: No relevancy - completely irrelevant context
|
|
148
|
+
|
|
149
|
+
## Example with Custom Configuration
|
|
150
|
+
|
|
151
|
+
```typescript
|
|
152
|
+
import { openai } from "@ai-sdk/openai";
|
|
153
|
+
import { ContextRelevancyMetric } from "@mastra/evals/llm";
|
|
154
|
+
|
|
155
|
+
// Configure the model for evaluation
|
|
156
|
+
const model = openai("gpt-4o-mini");
|
|
157
|
+
|
|
158
|
+
const metric = new ContextRelevancyMetric(model, {
|
|
159
|
+
scale: 100, // Use 0-100 scale instead of 0-1
|
|
160
|
+
context: [
|
|
161
|
+
"Basic plan costs $10/month",
|
|
162
|
+
"Pro plan includes advanced features at $30/month",
|
|
163
|
+
"Enterprise plan has custom pricing",
|
|
164
|
+
"Our company was founded in 2020",
|
|
165
|
+
"We have offices worldwide"
|
|
166
|
+
]
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
const result = await metric.measure(
|
|
170
|
+
"What are our pricing plans?",
|
|
171
|
+
"We offer Basic, Pro, and Enterprise plans.",
|
|
172
|
+
);
|
|
173
|
+
|
|
174
|
+
// Example output:
|
|
175
|
+
// {
|
|
176
|
+
// score: 60,
|
|
177
|
+
// info: {
|
|
178
|
+
// reason: "3 out of 5 statements are relevant to pricing plans. The statements about
|
|
179
|
+
// company founding and office locations are not relevant to the pricing query."
|
|
180
|
+
// }
|
|
181
|
+
// }
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
## Related
|
|
185
|
+
|
|
186
|
+
- [Contextual Recall Metric](./contextual-recall)
|
|
187
|
+
- [Context Precision Metric](./context-precision)
|
|
188
|
+
- [Context Position Metric](./context-position)
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: Contextual Recall | Metrics | Evals | Mastra Docs"
|
|
3
|
+
description: Documentation for the Contextual Recall Metric, which evaluates the completeness of LLM responses in incorporating relevant context.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# ContextualRecallMetric
|
|
7
|
+
|
|
8
|
+
The `ContextualRecallMetric` class evaluates how effectively an LLM's response incorporates all relevant information from the provided context. It measures whether important information from the reference documents was successfully included in the response, focusing on completeness rather than precision.
|
|
9
|
+
|
|
10
|
+
## Basic Usage
|
|
11
|
+
|
|
12
|
+
```typescript
|
|
13
|
+
import { openai } from "@ai-sdk/openai";
|
|
14
|
+
import { ContextualRecallMetric } from "@mastra/evals/llm";
|
|
15
|
+
|
|
16
|
+
// Configure the model for evaluation
|
|
17
|
+
const model = openai("gpt-4o-mini");
|
|
18
|
+
|
|
19
|
+
const metric = new ContextualRecallMetric(model, {
|
|
20
|
+
context: [
|
|
21
|
+
"Product features: cloud synchronization capability",
|
|
22
|
+
"Offline mode available for all users",
|
|
23
|
+
"Supports multiple devices simultaneously",
|
|
24
|
+
"End-to-end encryption for all data"
|
|
25
|
+
]
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
const result = await metric.measure(
|
|
29
|
+
"What are the key features of the product?",
|
|
30
|
+
"The product includes cloud sync, offline mode, and multi-device support.",
|
|
31
|
+
);
|
|
32
|
+
|
|
33
|
+
console.log(result.score); // Score from 0-1
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Constructor Parameters
|
|
37
|
+
|
|
38
|
+
<PropertiesTable
|
|
39
|
+
content={[
|
|
40
|
+
{
|
|
41
|
+
name: "model",
|
|
42
|
+
type: "LanguageModel",
|
|
43
|
+
description: "Configuration for the model used to evaluate contextual recall",
|
|
44
|
+
isOptional: false,
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
name: "options",
|
|
48
|
+
type: "ContextualRecallMetricOptions",
|
|
49
|
+
description: "Configuration options for the metric",
|
|
50
|
+
isOptional: false,
|
|
51
|
+
}
|
|
52
|
+
]}
|
|
53
|
+
/>
|
|
54
|
+
|
|
55
|
+
### ContextualRecallMetricOptions
|
|
56
|
+
|
|
57
|
+
<PropertiesTable
|
|
58
|
+
content={[
|
|
59
|
+
{
|
|
60
|
+
name: "scale",
|
|
61
|
+
type: "number",
|
|
62
|
+
description: "Maximum score value",
|
|
63
|
+
isOptional: true,
|
|
64
|
+
defaultValue: "1",
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
name: "context",
|
|
68
|
+
type: "string[]",
|
|
69
|
+
description: "Array of reference documents or pieces of information to check against",
|
|
70
|
+
isOptional: false,
|
|
71
|
+
}
|
|
72
|
+
]}
|
|
73
|
+
/>
|
|
74
|
+
|
|
75
|
+
## measure() Parameters
|
|
76
|
+
|
|
77
|
+
<PropertiesTable
|
|
78
|
+
content={[
|
|
79
|
+
{
|
|
80
|
+
name: "input",
|
|
81
|
+
type: "string",
|
|
82
|
+
description: "The original query or prompt",
|
|
83
|
+
isOptional: false,
|
|
84
|
+
},
|
|
85
|
+
{
|
|
86
|
+
name: "output",
|
|
87
|
+
type: "string",
|
|
88
|
+
description: "The LLM's response to evaluate",
|
|
89
|
+
isOptional: false,
|
|
90
|
+
}
|
|
91
|
+
]}
|
|
92
|
+
/>
|
|
93
|
+
|
|
94
|
+
## Returns
|
|
95
|
+
|
|
96
|
+
<PropertiesTable
|
|
97
|
+
content={[
|
|
98
|
+
{
|
|
99
|
+
name: "score",
|
|
100
|
+
type: "number",
|
|
101
|
+
description: "Recall score (0 to scale, default 0-1)",
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
name: "info",
|
|
105
|
+
type: "object",
|
|
106
|
+
description: "Object containing the reason for the score",
|
|
107
|
+
properties: [
|
|
108
|
+
{
|
|
109
|
+
type: "string",
|
|
110
|
+
parameters: [
|
|
111
|
+
{
|
|
112
|
+
name: "reason",
|
|
113
|
+
type: "string",
|
|
114
|
+
description: "Detailed explanation of the score",
|
|
115
|
+
}
|
|
116
|
+
]
|
|
117
|
+
}
|
|
118
|
+
]
|
|
119
|
+
}
|
|
120
|
+
]}
|
|
121
|
+
/>
|
|
122
|
+
|
|
123
|
+
## Scoring Details
|
|
124
|
+
|
|
125
|
+
The metric evaluates recall through comparison of response content against relevant context items.
|
|
126
|
+
|
|
127
|
+
### Scoring Process
|
|
128
|
+
|
|
129
|
+
1. Evaluates information recall:
|
|
130
|
+
- Identifies relevant items in context
|
|
131
|
+
- Tracks correctly recalled information
|
|
132
|
+
- Measures completeness of recall
|
|
133
|
+
|
|
134
|
+
2. Calculates recall score:
|
|
135
|
+
- Counts correctly recalled items
|
|
136
|
+
- Compares against total relevant items
|
|
137
|
+
- Computes coverage ratio
|
|
138
|
+
|
|
139
|
+
Final score: `(correctly_recalled_items / total_relevant_items) * scale`
|
|
140
|
+
|
|
141
|
+
### Score interpretation
|
|
142
|
+
(0 to scale, default 0-1)
|
|
143
|
+
- 1.0: Perfect recall - all relevant information included
|
|
144
|
+
- 0.7-0.9: High recall - most relevant information included
|
|
145
|
+
- 0.4-0.6: Moderate recall - some relevant information missed
|
|
146
|
+
- 0.1-0.3: Low recall - significant information missed
|
|
147
|
+
- 0.0: No recall - no relevant information included
|
|
148
|
+
|
|
149
|
+
## Example with Custom Configuration
|
|
150
|
+
|
|
151
|
+
```typescript
|
|
152
|
+
import { openai } from "@ai-sdk/openai";
|
|
153
|
+
import { ContextualRecallMetric } from "@mastra/evals/llm";
|
|
154
|
+
|
|
155
|
+
// Configure the model for evaluation
|
|
156
|
+
const model = openai("gpt-4o-mini");
|
|
157
|
+
|
|
158
|
+
const metric = new ContextualRecallMetric(
|
|
159
|
+
model,
|
|
160
|
+
{
|
|
161
|
+
scale: 100, // Use 0-100 scale instead of 0-1
|
|
162
|
+
context: [
|
|
163
|
+
"All data is encrypted at rest and in transit",
|
|
164
|
+
"Two-factor authentication (2FA) is mandatory",
|
|
165
|
+
"Regular security audits are performed",
|
|
166
|
+
"Incident response team available 24/7"
|
|
167
|
+
]
|
|
168
|
+
}
|
|
169
|
+
);
|
|
170
|
+
|
|
171
|
+
const result = await metric.measure(
|
|
172
|
+
"Summarize the company's security measures",
|
|
173
|
+
"The company implements encryption for data protection and requires 2FA for all users.",
|
|
174
|
+
);
|
|
175
|
+
|
|
176
|
+
// Example output:
|
|
177
|
+
// {
|
|
178
|
+
// score: 50, // Only half of the security measures were mentioned
|
|
179
|
+
// info: {
|
|
180
|
+
// reason: "The score is 50 because only half of the security measures were mentioned
|
|
181
|
+
// in the response. The response missed the regular security audits and incident
|
|
182
|
+
// response team information."
|
|
183
|
+
// }
|
|
184
|
+
// }
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
## Related
|
|
188
|
+
|
|
189
|
+
+ [Context Relevancy Metric](./context-relevancy)
|
|
190
|
+
+ [Completeness Metric](./completeness)
|
|
191
|
+
+ [Summarization Metric](./summarization)
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: Faithfulness | Metrics | Evals | Mastra Docs"
|
|
3
|
+
description: Documentation for the Faithfulness Metric in Mastra, which evaluates the factual accuracy of LLM outputs compared to the provided context.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# FaithfulnessMetric Reference
|
|
7
|
+
|
|
8
|
+
The `FaithfulnessMetric` in Mastra evaluates how factually accurate an LLM's output is compared to the provided context. It extracts claims from the output and verifies them against the context, making it essential to measure RAG pipeline responses' reliability.
|
|
9
|
+
|
|
10
|
+
## Basic Usage
|
|
11
|
+
|
|
12
|
+
```typescript
|
|
13
|
+
import { openai } from "@ai-sdk/openai";
|
|
14
|
+
import { FaithfulnessMetric } from "@mastra/evals/llm";
|
|
15
|
+
|
|
16
|
+
// Configure the model for evaluation
|
|
17
|
+
const model = openai("gpt-4o-mini");
|
|
18
|
+
|
|
19
|
+
const metric = new FaithfulnessMetric(model, {
|
|
20
|
+
context: [
|
|
21
|
+
"The company was established in 1995.",
|
|
22
|
+
"Currently employs around 450-550 people.",
|
|
23
|
+
],
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
const result = await metric.measure(
|
|
27
|
+
"Tell me about the company.",
|
|
28
|
+
"The company was founded in 1995 and has 500 employees.",
|
|
29
|
+
);
|
|
30
|
+
|
|
31
|
+
console.log(result.score); // 1.0
|
|
32
|
+
console.log(result.info.reason); // "All claims are supported by the context."
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Constructor Parameters
|
|
36
|
+
|
|
37
|
+
<PropertiesTable
|
|
38
|
+
content={[
|
|
39
|
+
{
|
|
40
|
+
name: "model",
|
|
41
|
+
type: "LanguageModel",
|
|
42
|
+
description: "Configuration for the model used to evaluate faithfulness.",
|
|
43
|
+
isOptional: false,
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
name: "options",
|
|
47
|
+
type: "FaithfulnessMetricOptions",
|
|
48
|
+
description: "Additional options for configuring the metric.",
|
|
49
|
+
isOptional: false,
|
|
50
|
+
},
|
|
51
|
+
]}
|
|
52
|
+
/>
|
|
53
|
+
|
|
54
|
+
### FaithfulnessMetricOptions
|
|
55
|
+
|
|
56
|
+
<PropertiesTable
|
|
57
|
+
content={[
|
|
58
|
+
{
|
|
59
|
+
name: "scale",
|
|
60
|
+
type: "number",
|
|
61
|
+
description:
|
|
62
|
+
"The maximum score value. The final score will be normalized to this scale.",
|
|
63
|
+
isOptional: false,
|
|
64
|
+
defaultValue: "1",
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
name: "context",
|
|
68
|
+
type: "string[]",
|
|
69
|
+
description:
|
|
70
|
+
"Array of context chunks against which the output's claims will be verified.",
|
|
71
|
+
isOptional: false,
|
|
72
|
+
},
|
|
73
|
+
]}
|
|
74
|
+
/>
|
|
75
|
+
|
|
76
|
+
## measure() Parameters
|
|
77
|
+
|
|
78
|
+
<PropertiesTable
|
|
79
|
+
content={[
|
|
80
|
+
{
|
|
81
|
+
name: "input",
|
|
82
|
+
type: "string",
|
|
83
|
+
description: "The original query or prompt given to the LLM.",
|
|
84
|
+
isOptional: false,
|
|
85
|
+
},
|
|
86
|
+
{
|
|
87
|
+
name: "output",
|
|
88
|
+
type: "string",
|
|
89
|
+
description: "The LLM's response to be evaluated for faithfulness.",
|
|
90
|
+
isOptional: false,
|
|
91
|
+
},
|
|
92
|
+
]}
|
|
93
|
+
/>
|
|
94
|
+
|
|
95
|
+
## Returns
|
|
96
|
+
|
|
97
|
+
<PropertiesTable
|
|
98
|
+
content={[
|
|
99
|
+
{
|
|
100
|
+
name: "score",
|
|
101
|
+
type: "number",
|
|
102
|
+
description:
|
|
103
|
+
"A score between 0 and the configured scale, representing the proportion of claims that are supported by the context.",
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
name: "info",
|
|
107
|
+
type: "object",
|
|
108
|
+
description: "Object containing the reason for the score",
|
|
109
|
+
properties: [
|
|
110
|
+
{
|
|
111
|
+
type: "string",
|
|
112
|
+
parameters: [
|
|
113
|
+
{
|
|
114
|
+
name: "reason",
|
|
115
|
+
type: "string",
|
|
116
|
+
description:
|
|
117
|
+
"A detailed explanation of the score, including which claims were supported, contradicted, or marked as unsure.",
|
|
118
|
+
},
|
|
119
|
+
],
|
|
120
|
+
},
|
|
121
|
+
],
|
|
122
|
+
},
|
|
123
|
+
]}
|
|
124
|
+
/>
|
|
125
|
+
|
|
126
|
+
## Scoring Details
|
|
127
|
+
|
|
128
|
+
The metric evaluates faithfulness through claim verification against provided context.
|
|
129
|
+
|
|
130
|
+
### Scoring Process
|
|
131
|
+
|
|
132
|
+
1. Analyzes claims and context:
|
|
133
|
+
- Extracts all claims (factual and speculative)
|
|
134
|
+
- Verifies each claim against context
|
|
135
|
+
- Assigns one of three verdicts:
|
|
136
|
+
- "yes" - claim supported by context
|
|
137
|
+
- "no" - claim contradicts context
|
|
138
|
+
- "unsure" - claim unverifiable
|
|
139
|
+
|
|
140
|
+
2. Calculates faithfulness score:
|
|
141
|
+
- Counts supported claims
|
|
142
|
+
- Divides by total claims
|
|
143
|
+
- Scales to configured range
|
|
144
|
+
|
|
145
|
+
Final score: `(supported_claims / total_claims) * scale`
|
|
146
|
+
|
|
147
|
+
### Score interpretation
|
|
148
|
+
(0 to scale, default 0-1)
|
|
149
|
+
- 1.0: All claims supported by context
|
|
150
|
+
- 0.7-0.9: Most claims supported, few unverifiable
|
|
151
|
+
- 0.4-0.6: Mixed support with some contradictions
|
|
152
|
+
- 0.1-0.3: Limited support, many contradictions
|
|
153
|
+
- 0.0: No supported claims
|
|
154
|
+
|
|
155
|
+
## Advanced Example
|
|
156
|
+
|
|
157
|
+
```typescript
|
|
158
|
+
import { openai } from "@ai-sdk/openai";
|
|
159
|
+
import { FaithfulnessMetric } from "@mastra/evals/llm";
|
|
160
|
+
|
|
161
|
+
// Configure the model for evaluation
|
|
162
|
+
const model = openai("gpt-4o-mini");
|
|
163
|
+
|
|
164
|
+
const metric = new FaithfulnessMetric(model, {
|
|
165
|
+
context: [
|
|
166
|
+
"The company had 100 employees in 2020.",
|
|
167
|
+
"Current employee count is approximately 500.",
|
|
168
|
+
],
|
|
169
|
+
});
|
|
170
|
+
|
|
171
|
+
// Example with mixed claim types
|
|
172
|
+
const result = await metric.measure(
|
|
173
|
+
"What's the company's growth like?",
|
|
174
|
+
"The company has grown from 100 employees in 2020 to 500 now, and might expand to 1000 by next year.",
|
|
175
|
+
);
|
|
176
|
+
|
|
177
|
+
// Example output:
|
|
178
|
+
// {
|
|
179
|
+
// score: 0.67,
|
|
180
|
+
// info: {
|
|
181
|
+
// reason: "The score is 0.67 because two claims are supported by the context
|
|
182
|
+
// (initial employee count of 100 in 2020 and current count of 500),
|
|
183
|
+
// while the future expansion claim is marked as unsure as it cannot
|
|
184
|
+
// be verified against the context."
|
|
185
|
+
// }
|
|
186
|
+
// }
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
### Related
|
|
190
|
+
|
|
191
|
+
- [Answer Relevancy Metric](./answer-relevancy)
|
|
192
|
+
- [Hallucination Metric](./hallucination)
|
|
193
|
+
- [Context Relevancy Metric](./context-relevancy)
|