@mastra/mcp-docs-server 0.0.1-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/organized/changelogs/%40mastra%2Fastra.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fchroma.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fcomposio.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fcore.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-netlify.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-vercel.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fevals.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Ffirecrawl.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fgithub.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Floggers.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +11 -0
- package/.docs/organized/changelogs/%40mastra%2Fmcp.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fmemory.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fpg.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fpinecone.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fqdrant.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Frag.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fragie.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-azure.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-deepgram.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-elevenlabs.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-google.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-ibm.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-murf.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-openai.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-playai.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-replicate.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-speechify.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fstabilityai.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fturbopuffer.md +59 -0
- package/.docs/organized/changelogs/%40mastra%2Fupstash.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fvectorize.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-deepgram.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-elevenlabs.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-google.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-murf.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-openai-realtime.md +24 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-openai.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-playai.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-sarvam.md +12 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-speechify.md +302 -0
- package/.docs/organized/changelogs/create-mastra.md +302 -0
- package/.docs/organized/changelogs/mastra.md +302 -0
- package/.docs/organized/code-examples/agent.md +385 -0
- package/.docs/organized/code-examples/ai-sdk-useChat.md +377 -0
- package/.docs/organized/code-examples/assistant-ui.md +37 -0
- package/.docs/organized/code-examples/bird-checker-with-express.md +235 -0
- package/.docs/organized/code-examples/bird-checker-with-nextjs-and-eval.md +360 -0
- package/.docs/organized/code-examples/bird-checker-with-nextjs.md +250 -0
- package/.docs/organized/code-examples/crypto-chatbot.md +96 -0
- package/.docs/organized/code-examples/fireworks-r1.md +159 -0
- package/.docs/organized/code-examples/integrations.md +184 -0
- package/.docs/organized/code-examples/mcp-configuration.md +341 -0
- package/.docs/organized/code-examples/memory-todo-agent.md +161 -0
- package/.docs/organized/code-examples/memory-with-context.md +167 -0
- package/.docs/organized/code-examples/memory-with-libsql.md +204 -0
- package/.docs/organized/code-examples/memory-with-pg.md +224 -0
- package/.docs/organized/code-examples/memory-with-upstash.md +268 -0
- package/.docs/organized/code-examples/quick-start.md +127 -0
- package/.docs/organized/code-examples/stock-price-tool.md +124 -0
- package/.docs/organized/code-examples/weather-agent.md +352 -0
- package/.docs/organized/code-examples/workflow-ai-recruiter.md +159 -0
- package/.docs/organized/code-examples/workflow-with-inline-steps.md +111 -0
- package/.docs/organized/code-examples/workflow-with-memory.md +393 -0
- package/.docs/organized/code-examples/workflow-with-separate-steps.md +131 -0
- package/.docs/raw/agents/00-overview.mdx +185 -0
- package/.docs/raw/agents/01-agent-memory.mdx +610 -0
- package/.docs/raw/agents/02-adding-tools.mdx +224 -0
- package/.docs/raw/agents/03-adding-voice.mdx +170 -0
- package/.docs/raw/deployment/deployment.mdx +156 -0
- package/.docs/raw/deployment/logging-and-tracing.mdx +242 -0
- package/.docs/raw/deployment/server.mdx +114 -0
- package/.docs/raw/evals/00-overview.mdx +106 -0
- package/.docs/raw/evals/01-supported-evals.mdx +31 -0
- package/.docs/raw/evals/02-custom-eval.mdx +187 -0
- package/.docs/raw/faq/index.mdx +63 -0
- package/.docs/raw/frameworks/01-next-js.mdx +238 -0
- package/.docs/raw/frameworks/02-ai-sdk.mdx +218 -0
- package/.docs/raw/getting-started/installation.mdx +436 -0
- package/.docs/raw/getting-started/project-structure.mdx +80 -0
- package/.docs/raw/guides/01-chef-michel.mdx +242 -0
- package/.docs/raw/guides/02-stock-agent.mdx +182 -0
- package/.docs/raw/guides/03-recruiter.mdx +187 -0
- package/.docs/raw/index.mdx +22 -0
- package/.docs/raw/local-dev/creating-projects.mdx +74 -0
- package/.docs/raw/local-dev/integrations.mdx +127 -0
- package/.docs/raw/local-dev/mastra-dev.mdx +65 -0
- package/.docs/raw/rag/chunking-and-embedding.mdx +128 -0
- package/.docs/raw/rag/overview.mdx +85 -0
- package/.docs/raw/rag/retrieval.mdx +362 -0
- package/.docs/raw/rag/vector-databases.mdx +271 -0
- package/.docs/raw/reference/agents/createTool.mdx +190 -0
- package/.docs/raw/reference/agents/generate.mdx +327 -0
- package/.docs/raw/reference/agents/getAgent.mdx +54 -0
- package/.docs/raw/reference/agents/stream.mdx +361 -0
- package/.docs/raw/reference/cli/build.mdx +48 -0
- package/.docs/raw/reference/cli/deploy.mdx +22 -0
- package/.docs/raw/reference/cli/dev.mdx +97 -0
- package/.docs/raw/reference/cli/init.mdx +43 -0
- package/.docs/raw/reference/client-js/agents.mdx +90 -0
- package/.docs/raw/reference/client-js/error-handling.mdx +38 -0
- package/.docs/raw/reference/client-js/index.mdx +127 -0
- package/.docs/raw/reference/client-js/logs.mdx +24 -0
- package/.docs/raw/reference/client-js/memory.mdx +94 -0
- package/.docs/raw/reference/client-js/telemetry.mdx +20 -0
- package/.docs/raw/reference/client-js/tools.mdx +44 -0
- package/.docs/raw/reference/client-js/vectors.mdx +79 -0
- package/.docs/raw/reference/client-js/workflows.mdx +137 -0
- package/.docs/raw/reference/core/mastra-class.mdx +232 -0
- package/.docs/raw/reference/deployer/cloudflare.mdx +176 -0
- package/.docs/raw/reference/deployer/deployer.mdx +159 -0
- package/.docs/raw/reference/deployer/netlify.mdx +88 -0
- package/.docs/raw/reference/deployer/vercel.mdx +97 -0
- package/.docs/raw/reference/evals/answer-relevancy.mdx +186 -0
- package/.docs/raw/reference/evals/bias.mdx +186 -0
- package/.docs/raw/reference/evals/completeness.mdx +174 -0
- package/.docs/raw/reference/evals/content-similarity.mdx +183 -0
- package/.docs/raw/reference/evals/context-position.mdx +190 -0
- package/.docs/raw/reference/evals/context-precision.mdx +189 -0
- package/.docs/raw/reference/evals/context-relevancy.mdx +188 -0
- package/.docs/raw/reference/evals/contextual-recall.mdx +191 -0
- package/.docs/raw/reference/evals/faithfulness.mdx +193 -0
- package/.docs/raw/reference/evals/hallucination.mdx +219 -0
- package/.docs/raw/reference/evals/keyword-coverage.mdx +176 -0
- package/.docs/raw/reference/evals/prompt-alignment.mdx +238 -0
- package/.docs/raw/reference/evals/summarization.mdx +205 -0
- package/.docs/raw/reference/evals/textual-difference.mdx +161 -0
- package/.docs/raw/reference/evals/tone-consistency.mdx +181 -0
- package/.docs/raw/reference/evals/toxicity.mdx +165 -0
- package/.docs/raw/reference/index.mdx +8 -0
- package/.docs/raw/reference/memory/Memory.mdx +186 -0
- package/.docs/raw/reference/memory/createThread.mdx +93 -0
- package/.docs/raw/reference/memory/getThreadById.mdx +43 -0
- package/.docs/raw/reference/memory/getThreadsByResourceId.mdx +45 -0
- package/.docs/raw/reference/memory/query.mdx +164 -0
- package/.docs/raw/reference/observability/create-logger.mdx +106 -0
- package/.docs/raw/reference/observability/logger.mdx +55 -0
- package/.docs/raw/reference/observability/otel-config.mdx +120 -0
- package/.docs/raw/reference/observability/providers/braintrust.mdx +40 -0
- package/.docs/raw/reference/observability/providers/index.mdx +15 -0
- package/.docs/raw/reference/observability/providers/laminar.mdx +41 -0
- package/.docs/raw/reference/observability/providers/langfuse.mdx +51 -0
- package/.docs/raw/reference/observability/providers/langsmith.mdx +46 -0
- package/.docs/raw/reference/observability/providers/langwatch.mdx +45 -0
- package/.docs/raw/reference/observability/providers/new-relic.mdx +40 -0
- package/.docs/raw/reference/observability/providers/signoz.mdx +40 -0
- package/.docs/raw/reference/observability/providers/traceloop.mdx +40 -0
- package/.docs/raw/reference/rag/astra.mdx +258 -0
- package/.docs/raw/reference/rag/chroma.mdx +281 -0
- package/.docs/raw/reference/rag/chunk.mdx +237 -0
- package/.docs/raw/reference/rag/document.mdx +129 -0
- package/.docs/raw/reference/rag/embeddings.mdx +160 -0
- package/.docs/raw/reference/rag/extract-params.mdx +72 -0
- package/.docs/raw/reference/rag/graph-rag.mdx +182 -0
- package/.docs/raw/reference/rag/libsql.mdx +357 -0
- package/.docs/raw/reference/rag/metadata-filters.mdx +298 -0
- package/.docs/raw/reference/rag/pg.mdx +477 -0
- package/.docs/raw/reference/rag/pinecone.mdx +249 -0
- package/.docs/raw/reference/rag/qdrant.mdx +236 -0
- package/.docs/raw/reference/rag/rerank.mdx +212 -0
- package/.docs/raw/reference/rag/turbopuffer.mdx +249 -0
- package/.docs/raw/reference/rag/upstash.mdx +198 -0
- package/.docs/raw/reference/rag/vectorize.mdx +253 -0
- package/.docs/raw/reference/storage/libsql.mdx +74 -0
- package/.docs/raw/reference/storage/postgresql.mdx +48 -0
- package/.docs/raw/reference/storage/upstash.mdx +86 -0
- package/.docs/raw/reference/tools/client.mdx +180 -0
- package/.docs/raw/reference/tools/document-chunker-tool.mdx +141 -0
- package/.docs/raw/reference/tools/graph-rag-tool.mdx +154 -0
- package/.docs/raw/reference/tools/mcp-configuration.mdx +206 -0
- package/.docs/raw/reference/tools/vector-query-tool.mdx +212 -0
- package/.docs/raw/reference/voice/composite-voice.mdx +140 -0
- package/.docs/raw/reference/voice/deepgram.mdx +164 -0
- package/.docs/raw/reference/voice/elevenlabs.mdx +216 -0
- package/.docs/raw/reference/voice/google.mdx +198 -0
- package/.docs/raw/reference/voice/mastra-voice.mdx +394 -0
- package/.docs/raw/reference/voice/murf.mdx +251 -0
- package/.docs/raw/reference/voice/openai-realtime.mdx +431 -0
- package/.docs/raw/reference/voice/openai.mdx +168 -0
- package/.docs/raw/reference/voice/playai.mdx +159 -0
- package/.docs/raw/reference/voice/speechify.mdx +145 -0
- package/.docs/raw/reference/workflows/after.mdx +88 -0
- package/.docs/raw/reference/workflows/commit.mdx +37 -0
- package/.docs/raw/reference/workflows/createRun.mdx +77 -0
- package/.docs/raw/reference/workflows/else.mdx +72 -0
- package/.docs/raw/reference/workflows/execute.mdx +110 -0
- package/.docs/raw/reference/workflows/if.mdx +107 -0
- package/.docs/raw/reference/workflows/resume.mdx +155 -0
- package/.docs/raw/reference/workflows/start.mdx +84 -0
- package/.docs/raw/reference/workflows/step-class.mdx +100 -0
- package/.docs/raw/reference/workflows/step-condition.mdx +134 -0
- package/.docs/raw/reference/workflows/step-function.mdx +92 -0
- package/.docs/raw/reference/workflows/step-options.mdx +69 -0
- package/.docs/raw/reference/workflows/suspend.mdx +80 -0
- package/.docs/raw/reference/workflows/then.mdx +74 -0
- package/.docs/raw/reference/workflows/until.mdx +165 -0
- package/.docs/raw/reference/workflows/watch.mdx +118 -0
- package/.docs/raw/reference/workflows/while.mdx +168 -0
- package/.docs/raw/reference/workflows/workflow.mdx +233 -0
- package/.docs/raw/workflows/00-overview.mdx +168 -0
- package/.docs/raw/workflows/control-flow.mdx +712 -0
- package/.docs/raw/workflows/dynamic-workflows.mdx +232 -0
- package/.docs/raw/workflows/steps.mdx +98 -0
- package/.docs/raw/workflows/suspend-and-resume.mdx +196 -0
- package/.docs/raw/workflows/variables.mdx +248 -0
- package/LICENSE +44 -0
- package/README.md +129 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.js +19 -0
- package/dist/prepare-docs/code-examples.d.ts +4 -0
- package/dist/prepare-docs/code-examples.js +91 -0
- package/dist/prepare-docs/copy-raw.d.ts +1 -0
- package/dist/prepare-docs/copy-raw.js +41 -0
- package/dist/prepare-docs/index.d.ts +1 -0
- package/dist/prepare-docs/index.js +8 -0
- package/dist/prepare-docs/package-changes.d.ts +4 -0
- package/dist/prepare-docs/package-changes.js +92 -0
- package/dist/prepare-docs/prepare.d.ts +1 -0
- package/dist/prepare-docs/prepare.js +13 -0
- package/dist/sse.d.ts +1 -0
- package/dist/sse.js +9 -0
- package/dist/stdio.d.ts +1 -0
- package/dist/stdio.js +8 -0
- package/dist/tools/__tests__/blog.test.d.ts +1 -0
- package/dist/tools/__tests__/blog.test.js +48 -0
- package/dist/tools/__tests__/changes.test.d.ts +1 -0
- package/dist/tools/__tests__/changes.test.js +36 -0
- package/dist/tools/__tests__/docs.test.d.ts +1 -0
- package/dist/tools/__tests__/docs.test.js +46 -0
- package/dist/tools/__tests__/examples.test.d.ts +1 -0
- package/dist/tools/__tests__/examples.test.js +52 -0
- package/dist/tools/blog.d.ts +15 -0
- package/dist/tools/blog.js +73 -0
- package/dist/tools/changes.d.ts +11 -0
- package/dist/tools/changes.js +69 -0
- package/dist/tools/docs.d.ts +11 -0
- package/dist/tools/docs.js +176 -0
- package/dist/tools/examples.d.ts +11 -0
- package/dist/tools/examples.js +61 -0
- package/dist/utils.d.ts +6 -0
- package/dist/utils.js +9 -0
- package/package.json +66 -0
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: Summarization | Metrics | Evals | Mastra Docs"
|
|
3
|
+
description: Documentation for the Summarization Metric in Mastra, which evaluates the quality of LLM-generated summaries for content and factual accuracy.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# SummarizationMetric
|
|
7
|
+
,
|
|
8
|
+
The `SummarizationMetric` evaluates how well an LLM's summary captures the original text's content while maintaining factual accuracy. It combines two aspects: alignment (factual correctness) and coverage (inclusion of key information), using the minimum scores to ensure both qualities are necessary for a good summary.
|
|
9
|
+
|
|
10
|
+
## Basic Usage
|
|
11
|
+
|
|
12
|
+
```typescript
|
|
13
|
+
import { openai } from "@ai-sdk/openai";
|
|
14
|
+
import { SummarizationMetric } from "@mastra/evals/llm";
|
|
15
|
+
|
|
16
|
+
// Configure the model for evaluation
|
|
17
|
+
const model = openai("gpt-4o-mini");
|
|
18
|
+
|
|
19
|
+
const metric = new SummarizationMetric(model);
|
|
20
|
+
|
|
21
|
+
const result = await metric.measure(
|
|
22
|
+
"The company was founded in 1995 by John Smith. It started with 10 employees and grew to 500 by 2020. The company is based in Seattle.",
|
|
23
|
+
"Founded in 1995 by John Smith, the company grew from 10 to 500 employees by 2020.",
|
|
24
|
+
);
|
|
25
|
+
|
|
26
|
+
console.log(result.score); // Score from 0-1
|
|
27
|
+
console.log(result.info); // Object containing detailed metrics about the summary
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Constructor Parameters
|
|
31
|
+
|
|
32
|
+
<PropertiesTable
|
|
33
|
+
content={[
|
|
34
|
+
{
|
|
35
|
+
name: "model",
|
|
36
|
+
type: "LanguageModel",
|
|
37
|
+
description: "Configuration for the model used to evaluate summaries",
|
|
38
|
+
isOptional: false,
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
name: "options",
|
|
42
|
+
type: "SummarizationMetricOptions",
|
|
43
|
+
description: "Configuration options for the metric",
|
|
44
|
+
isOptional: true,
|
|
45
|
+
defaultValue: "{ scale: 1 }",
|
|
46
|
+
},
|
|
47
|
+
]}
|
|
48
|
+
/>
|
|
49
|
+
|
|
50
|
+
### SummarizationMetricOptions
|
|
51
|
+
|
|
52
|
+
<PropertiesTable
|
|
53
|
+
content={[
|
|
54
|
+
{
|
|
55
|
+
name: "scale",
|
|
56
|
+
type: "number",
|
|
57
|
+
description: "Maximum score value",
|
|
58
|
+
isOptional: true,
|
|
59
|
+
defaultValue: "1",
|
|
60
|
+
},
|
|
61
|
+
]}
|
|
62
|
+
/>
|
|
63
|
+
|
|
64
|
+
## measure() Parameters
|
|
65
|
+
|
|
66
|
+
<PropertiesTable
|
|
67
|
+
content={[
|
|
68
|
+
{
|
|
69
|
+
name: "input",
|
|
70
|
+
type: "string",
|
|
71
|
+
description: "The original text to be summarized",
|
|
72
|
+
isOptional: false,
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
name: "output",
|
|
76
|
+
type: "string",
|
|
77
|
+
description: "The generated summary to evaluate",
|
|
78
|
+
isOptional: false,
|
|
79
|
+
},
|
|
80
|
+
]}
|
|
81
|
+
/>
|
|
82
|
+
|
|
83
|
+
## Returns
|
|
84
|
+
|
|
85
|
+
<PropertiesTable
|
|
86
|
+
content={[
|
|
87
|
+
{
|
|
88
|
+
name: "score",
|
|
89
|
+
type: "number",
|
|
90
|
+
description: "Summarization score (0 to scale, default 0-1)",
|
|
91
|
+
},
|
|
92
|
+
{
|
|
93
|
+
name: "info",
|
|
94
|
+
type: "object",
|
|
95
|
+
description: "Object containing detailed metrics about the summary",
|
|
96
|
+
properties: [
|
|
97
|
+
{
|
|
98
|
+
type: "string",
|
|
99
|
+
parameters: [
|
|
100
|
+
{
|
|
101
|
+
name: "reason",
|
|
102
|
+
type: "string",
|
|
103
|
+
description:
|
|
104
|
+
"Detailed explanation of the score, including both alignment and coverage aspects",
|
|
105
|
+
},
|
|
106
|
+
],
|
|
107
|
+
},
|
|
108
|
+
{
|
|
109
|
+
type: "number",
|
|
110
|
+
parameters: [
|
|
111
|
+
{
|
|
112
|
+
name: "alignmentScore",
|
|
113
|
+
type: "number",
|
|
114
|
+
description: "Alignment score (0 to 1)",
|
|
115
|
+
},
|
|
116
|
+
],
|
|
117
|
+
},
|
|
118
|
+
{
|
|
119
|
+
type: "number",
|
|
120
|
+
parameters: [
|
|
121
|
+
{
|
|
122
|
+
name: "coverageScore",
|
|
123
|
+
type: "number",
|
|
124
|
+
description: "Coverage score (0 to 1)",
|
|
125
|
+
},
|
|
126
|
+
],
|
|
127
|
+
},
|
|
128
|
+
],
|
|
129
|
+
},
|
|
130
|
+
]}
|
|
131
|
+
/>
|
|
132
|
+
|
|
133
|
+
## Scoring Details
|
|
134
|
+
|
|
135
|
+
The metric evaluates summaries through two essential components:
|
|
136
|
+
1. **Alignment Score**: Measures factual correctness
|
|
137
|
+
- Extracts claims from the summary
|
|
138
|
+
- Verifies each claim against the original text
|
|
139
|
+
- Assigns "yes", "no", or "unsure" verdicts
|
|
140
|
+
|
|
141
|
+
2. **Coverage Score**: Measures inclusion of key information
|
|
142
|
+
- Generates key questions from the original text
|
|
143
|
+
- Check if the summary answers these questions
|
|
144
|
+
- Checks information inclusion and assesses comprehensiveness
|
|
145
|
+
|
|
146
|
+
### Scoring Process
|
|
147
|
+
|
|
148
|
+
1. Calculates alignment score:
|
|
149
|
+
- Extracts claims from summary
|
|
150
|
+
- Verifies against source text
|
|
151
|
+
- Computes: `supported_claims / total_claims`
|
|
152
|
+
|
|
153
|
+
2. Determines coverage score:
|
|
154
|
+
- Generates questions from source
|
|
155
|
+
- Checks summary for answers
|
|
156
|
+
- Evaluates completeness
|
|
157
|
+
- Calculates: `answerable_questions / total_questions`
|
|
158
|
+
|
|
159
|
+
Final score: `min(alignment_score, coverage_score) * scale`
|
|
160
|
+
|
|
161
|
+
### Score interpretation
|
|
162
|
+
(0 to scale, default 0-1)
|
|
163
|
+
- 1.0: Perfect summary - completely factual and covers all key information
|
|
164
|
+
- 0.7-0.9: Strong summary with minor omissions or slight inaccuracies
|
|
165
|
+
- 0.4-0.6: Moderate quality with significant gaps or inaccuracies
|
|
166
|
+
- 0.1-0.3: Poor summary with major omissions or factual errors
|
|
167
|
+
- 0.0: Invalid summary - either completely inaccurate or missing critical information
|
|
168
|
+
|
|
169
|
+
## Example with Analysis
|
|
170
|
+
|
|
171
|
+
```typescript
|
|
172
|
+
import { openai } from "@ai-sdk/openai";
|
|
173
|
+
import { SummarizationMetric } from "@mastra/evals/llm";
|
|
174
|
+
|
|
175
|
+
// Configure the model for evaluation
|
|
176
|
+
const model = openai("gpt-4o-mini");
|
|
177
|
+
|
|
178
|
+
const metric = new SummarizationMetric(model);
|
|
179
|
+
|
|
180
|
+
const result = await metric.measure(
|
|
181
|
+
"The electric car company Tesla was founded in 2003 by Martin Eberhard and Marc Tarpenning. Elon Musk joined in 2004 as the largest investor and became CEO in 2008. The company's first car, the Roadster, was launched in 2008.",
|
|
182
|
+
"Tesla, founded by Elon Musk in 2003, revolutionized the electric car industry starting with the Roadster in 2008.",
|
|
183
|
+
);
|
|
184
|
+
|
|
185
|
+
// Example output:
|
|
186
|
+
// {
|
|
187
|
+
// score: 0.5,
|
|
188
|
+
// info: {
|
|
189
|
+
// reason: "The score is 0.5 because while the coverage is good (0.75) - mentioning the founding year,
|
|
190
|
+
// first car model, and launch date - the alignment score is lower (0.5) due to incorrectly
|
|
191
|
+
// attributing the company's founding to Elon Musk instead of Martin Eberhard and Marc Tarpenning.
|
|
192
|
+
// The final score takes the minimum of these two scores to ensure both factual accuracy and
|
|
193
|
+
// coverage are necessary for a good summary."
|
|
194
|
+
// alignmentScore: 0.5,
|
|
195
|
+
// coverageScore: 0.75,
|
|
196
|
+
// }
|
|
197
|
+
// }
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
## Related
|
|
201
|
+
|
|
202
|
+
- [Faithfulness Metric](./faithfulness)
|
|
203
|
+
- [Completeness Metric](./completeness)
|
|
204
|
+
- [Contextual Recall Metric](./contextual-recall)
|
|
205
|
+
- [Hallucination Metric](./hallucination)
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: Textual Difference | Evals | Mastra Docs"
|
|
3
|
+
description: Documentation for the Textual Difference Metric in Mastra, which measures textual differences between strings using sequence matching.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# TextualDifferenceMetric
|
|
7
|
+
|
|
8
|
+
The `TextualDifferenceMetric` class uses sequence matching to measure the textual differences between two strings. It provides detailed information about changes, including the number of operations needed to transform one text into another.
|
|
9
|
+
|
|
10
|
+
## Basic Usage
|
|
11
|
+
|
|
12
|
+
```typescript
|
|
13
|
+
import { TextualDifferenceMetric } from "@mastra/evals/nlp";
|
|
14
|
+
|
|
15
|
+
const metric = new TextualDifferenceMetric();
|
|
16
|
+
|
|
17
|
+
const result = await metric.measure(
|
|
18
|
+
"The quick brown fox",
|
|
19
|
+
"The fast brown fox"
|
|
20
|
+
);
|
|
21
|
+
|
|
22
|
+
console.log(result.score); // Similarity ratio from 0-1
|
|
23
|
+
console.log(result.info); // Detailed change metrics
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## measure() Parameters
|
|
27
|
+
|
|
28
|
+
<PropertiesTable
|
|
29
|
+
content={[
|
|
30
|
+
{
|
|
31
|
+
name: "input",
|
|
32
|
+
type: "string",
|
|
33
|
+
description: "The original text to compare against",
|
|
34
|
+
isOptional: false,
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
name: "output",
|
|
38
|
+
type: "string",
|
|
39
|
+
description: "The text to evaluate for differences",
|
|
40
|
+
isOptional: false,
|
|
41
|
+
}
|
|
42
|
+
]}
|
|
43
|
+
/>
|
|
44
|
+
|
|
45
|
+
## Returns
|
|
46
|
+
|
|
47
|
+
<PropertiesTable
|
|
48
|
+
content={[
|
|
49
|
+
{
|
|
50
|
+
name: "score",
|
|
51
|
+
type: "number",
|
|
52
|
+
description: "Similarity ratio (0-1) where 1 indicates identical texts",
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
name: "info",
|
|
56
|
+
description: "Detailed metrics about the differences",
|
|
57
|
+
properties: [
|
|
58
|
+
{
|
|
59
|
+
type: "number",
|
|
60
|
+
parameters: [
|
|
61
|
+
{
|
|
62
|
+
name: "confidence",
|
|
63
|
+
type: "number",
|
|
64
|
+
description: "Confidence score based on length difference between texts (0-1)",
|
|
65
|
+
}
|
|
66
|
+
]
|
|
67
|
+
},
|
|
68
|
+
{
|
|
69
|
+
type: "number",
|
|
70
|
+
parameters: [
|
|
71
|
+
{
|
|
72
|
+
name: "ratio",
|
|
73
|
+
type: "number",
|
|
74
|
+
description: "Raw similarity ratio between the texts",
|
|
75
|
+
}
|
|
76
|
+
]
|
|
77
|
+
},
|
|
78
|
+
{
|
|
79
|
+
type: "number",
|
|
80
|
+
parameters: [
|
|
81
|
+
{
|
|
82
|
+
name: "changes",
|
|
83
|
+
type: "number",
|
|
84
|
+
description: "Number of change operations (insertions, deletions, replacements)",
|
|
85
|
+
}
|
|
86
|
+
]
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
type: "number",
|
|
90
|
+
parameters: [
|
|
91
|
+
{
|
|
92
|
+
name: "lengthDiff",
|
|
93
|
+
type: "number",
|
|
94
|
+
description: "Normalized difference in length between input and output (0-1)",
|
|
95
|
+
}
|
|
96
|
+
]
|
|
97
|
+
},
|
|
98
|
+
]
|
|
99
|
+
},
|
|
100
|
+
]}
|
|
101
|
+
/>
|
|
102
|
+
|
|
103
|
+
## Scoring Details
|
|
104
|
+
|
|
105
|
+
The metric calculates several measures:
|
|
106
|
+
- **Similarity Ratio**: Based on sequence matching between texts (0-1)
|
|
107
|
+
- **Changes**: Count of non-matching operations needed
|
|
108
|
+
- **Length Difference**: Normalized difference in text lengths
|
|
109
|
+
- **Confidence**: Inversely proportional to length difference
|
|
110
|
+
|
|
111
|
+
### Scoring Process
|
|
112
|
+
|
|
113
|
+
1. Analyzes textual differences:
|
|
114
|
+
- Performs sequence matching between input and output
|
|
115
|
+
- Counts the number of change operations required
|
|
116
|
+
- Measures length differences
|
|
117
|
+
|
|
118
|
+
2. Calculates metrics:
|
|
119
|
+
- Computes similarity ratio
|
|
120
|
+
- Determines confidence score
|
|
121
|
+
- Combines into weighted score
|
|
122
|
+
|
|
123
|
+
Final score: `(similarity_ratio * confidence) * scale`
|
|
124
|
+
|
|
125
|
+
### Score interpretation
|
|
126
|
+
(0 to scale, default 0-1)
|
|
127
|
+
- 1.0: Identical texts - no differences
|
|
128
|
+
- 0.7-0.9: Minor differences - few changes needed
|
|
129
|
+
- 0.4-0.6: Moderate differences - significant changes
|
|
130
|
+
- 0.1-0.3: Major differences - extensive changes
|
|
131
|
+
- 0.0: Completely different texts
|
|
132
|
+
|
|
133
|
+
## Example with Analysis
|
|
134
|
+
|
|
135
|
+
```typescript
|
|
136
|
+
import { TextualDifferenceMetric } from "@mastra/evals/nlp";
|
|
137
|
+
|
|
138
|
+
const metric = new TextualDifferenceMetric();
|
|
139
|
+
|
|
140
|
+
const result = await metric.measure(
|
|
141
|
+
"Hello world! How are you?",
|
|
142
|
+
"Hello there! How is it going?"
|
|
143
|
+
);
|
|
144
|
+
|
|
145
|
+
// Example output:
|
|
146
|
+
// {
|
|
147
|
+
// score: 0.65,
|
|
148
|
+
// info: {
|
|
149
|
+
// confidence: 0.95,
|
|
150
|
+
// ratio: 0.65,
|
|
151
|
+
// changes: 2,
|
|
152
|
+
// lengthDiff: 0.05
|
|
153
|
+
// }
|
|
154
|
+
// }
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
## Related
|
|
158
|
+
|
|
159
|
+
- [Content Similarity Metric](./content-similarity)
|
|
160
|
+
- [Completeness Metric](./completeness)
|
|
161
|
+
- [Keyword Coverage Metric](./keyword-coverage)
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: Tone Consistency | Metrics | Evals | Mastra Docs"
|
|
3
|
+
description: Documentation for the Tone Consistency Metric in Mastra, which evaluates emotional tone and sentiment consistency in text.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# ToneConsistencyMetric
|
|
7
|
+
|
|
8
|
+
The `ToneConsistencyMetric` class evaluates the text's emotional tone and sentiment consistency. It can operate in two modes: comparing tone between input/output pairs or analyzing tone stability within a single text.
|
|
9
|
+
|
|
10
|
+
## Basic Usage
|
|
11
|
+
|
|
12
|
+
```typescript
|
|
13
|
+
import { ToneConsistencyMetric } from "@mastra/evals/nlp";
|
|
14
|
+
|
|
15
|
+
const metric = new ToneConsistencyMetric();
|
|
16
|
+
|
|
17
|
+
// Compare tone between input and output
|
|
18
|
+
const result1 = await metric.measure(
|
|
19
|
+
"I love this amazing product!",
|
|
20
|
+
"This product is wonderful and fantastic!"
|
|
21
|
+
);
|
|
22
|
+
|
|
23
|
+
// Analyze tone stability in a single text
|
|
24
|
+
const result2 = await metric.measure(
|
|
25
|
+
"The service is excellent. The staff is friendly. The atmosphere is perfect.",
|
|
26
|
+
"" // Empty string for single-text analysis
|
|
27
|
+
);
|
|
28
|
+
|
|
29
|
+
console.log(result1.score); // Tone consistency score from 0-1
|
|
30
|
+
console.log(result2.score); // Tone stability score from 0-1
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## measure() Parameters
|
|
34
|
+
|
|
35
|
+
<PropertiesTable
|
|
36
|
+
content={[
|
|
37
|
+
{
|
|
38
|
+
name: "input",
|
|
39
|
+
type: "string",
|
|
40
|
+
description: "The text to analyze for tone",
|
|
41
|
+
isOptional: false,
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
name: "output",
|
|
45
|
+
type: "string",
|
|
46
|
+
description: "Reference text for tone comparison (empty string for stability analysis)",
|
|
47
|
+
isOptional: false,
|
|
48
|
+
}
|
|
49
|
+
]}
|
|
50
|
+
/>
|
|
51
|
+
|
|
52
|
+
## Returns
|
|
53
|
+
|
|
54
|
+
<PropertiesTable
|
|
55
|
+
content={[
|
|
56
|
+
{
|
|
57
|
+
name: "score",
|
|
58
|
+
type: "number",
|
|
59
|
+
description: "Tone consistency/stability score (0-1)",
|
|
60
|
+
},
|
|
61
|
+
{
|
|
62
|
+
name: "info",
|
|
63
|
+
type: "object",
|
|
64
|
+
description: "Detailed tone info",
|
|
65
|
+
}
|
|
66
|
+
]}
|
|
67
|
+
/>
|
|
68
|
+
|
|
69
|
+
### info Object (Tone Comparison)
|
|
70
|
+
|
|
71
|
+
<PropertiesTable
|
|
72
|
+
content={[
|
|
73
|
+
{
|
|
74
|
+
name: "responseSentiment",
|
|
75
|
+
type: "number",
|
|
76
|
+
description: "Sentiment score for the input text",
|
|
77
|
+
},
|
|
78
|
+
{
|
|
79
|
+
name: "referenceSentiment",
|
|
80
|
+
type: "number",
|
|
81
|
+
description: "Sentiment score for the output text",
|
|
82
|
+
},
|
|
83
|
+
{
|
|
84
|
+
name: "difference",
|
|
85
|
+
type: "number",
|
|
86
|
+
description: "Absolute difference between sentiment scores",
|
|
87
|
+
}
|
|
88
|
+
]}
|
|
89
|
+
/>
|
|
90
|
+
|
|
91
|
+
### info Object (Tone Stability)
|
|
92
|
+
|
|
93
|
+
<PropertiesTable
|
|
94
|
+
content={[
|
|
95
|
+
{
|
|
96
|
+
name: "avgSentiment",
|
|
97
|
+
type: "number",
|
|
98
|
+
description: "Average sentiment score across sentences",
|
|
99
|
+
},
|
|
100
|
+
{
|
|
101
|
+
name: "sentimentVariance",
|
|
102
|
+
type: "number",
|
|
103
|
+
description: "Variance in sentiment between sentences",
|
|
104
|
+
}
|
|
105
|
+
]}
|
|
106
|
+
/>
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
## Scoring Details
|
|
110
|
+
|
|
111
|
+
The metric evaluates sentiment consistency through tone pattern analysis and mode-specific scoring.
|
|
112
|
+
|
|
113
|
+
### Scoring Process
|
|
114
|
+
|
|
115
|
+
1. Analyzes tone patterns:
|
|
116
|
+
- Extracts sentiment features
|
|
117
|
+
- Computes sentiment scores
|
|
118
|
+
- Measures tone variations
|
|
119
|
+
|
|
120
|
+
2. Calculates mode-specific score:
|
|
121
|
+
**Tone Consistency** (input and output):
|
|
122
|
+
- Compares sentiment between texts
|
|
123
|
+
- Calculates sentiment difference
|
|
124
|
+
- Score = 1 - (sentiment_difference / max_difference)
|
|
125
|
+
|
|
126
|
+
**Tone Stability** (single input):
|
|
127
|
+
- Analyzes sentiment across sentences
|
|
128
|
+
- Calculates sentiment variance
|
|
129
|
+
- Score = 1 - (sentiment_variance / max_variance)
|
|
130
|
+
|
|
131
|
+
Final score: `mode_specific_score * scale`
|
|
132
|
+
|
|
133
|
+
### Score interpretation
|
|
134
|
+
(0 to scale, default 0-1)
|
|
135
|
+
- 1.0: Perfect tone consistency/stability
|
|
136
|
+
- 0.7-0.9: Strong consistency with minor variations
|
|
137
|
+
- 0.4-0.6: Moderate consistency with noticeable shifts
|
|
138
|
+
- 0.1-0.3: Poor consistency with major tone changes
|
|
139
|
+
- 0.0: No consistency - completely different tones
|
|
140
|
+
|
|
141
|
+
## Example with Both Modes
|
|
142
|
+
|
|
143
|
+
```typescript
|
|
144
|
+
import { ToneConsistencyMetric } from "@mastra/evals/nlp";
|
|
145
|
+
|
|
146
|
+
const metric = new ToneConsistencyMetric();
|
|
147
|
+
|
|
148
|
+
// Tone Consistency Mode
|
|
149
|
+
const consistencyResult = await metric.measure(
|
|
150
|
+
"This product is fantastic and amazing!",
|
|
151
|
+
"The product is excellent and wonderful!"
|
|
152
|
+
);
|
|
153
|
+
// Example output:
|
|
154
|
+
// {
|
|
155
|
+
// score: 0.95,
|
|
156
|
+
// info: {
|
|
157
|
+
// responseSentiment: 0.8,
|
|
158
|
+
// referenceSentiment: 0.75,
|
|
159
|
+
// difference: 0.05
|
|
160
|
+
// }
|
|
161
|
+
// }
|
|
162
|
+
|
|
163
|
+
// Tone Stability Mode
|
|
164
|
+
const stabilityResult = await metric.measure(
|
|
165
|
+
"Great service! Friendly staff. Perfect atmosphere.",
|
|
166
|
+
""
|
|
167
|
+
);
|
|
168
|
+
// Example output:
|
|
169
|
+
// {
|
|
170
|
+
// score: 0.9,
|
|
171
|
+
// info: {
|
|
172
|
+
// avgSentiment: 0.6,
|
|
173
|
+
// sentimentVariance: 0.1
|
|
174
|
+
// }
|
|
175
|
+
// }
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
## Related
|
|
179
|
+
|
|
180
|
+
- [Content Similarity Metric](./content-similarity)
|
|
181
|
+
- [Toxicity Metric](./toxicity)
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: Toxicity | Metrics | Evals | Mastra Docs"
|
|
3
|
+
description: Documentation for the Toxicity Metric in Mastra, which evaluates LLM outputs for racist, biased, or toxic elements.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# ToxicityMetric
|
|
7
|
+
|
|
8
|
+
The `ToxicityMetric` class evaluates whether an LLM's output contains racist, biased, or toxic elements. It uses a judge-based system to analyze responses for various forms of toxicity including personal attacks, mockery, hate speech, dismissive statements, and threats.
|
|
9
|
+
|
|
10
|
+
## Basic Usage
|
|
11
|
+
|
|
12
|
+
```typescript
|
|
13
|
+
import { openai } from "@ai-sdk/openai";
|
|
14
|
+
import { ToxicityMetric } from "@mastra/evals/llm";
|
|
15
|
+
|
|
16
|
+
// Configure the model for evaluation
|
|
17
|
+
const model = openai("gpt-4o-mini");
|
|
18
|
+
|
|
19
|
+
const metric = new ToxicityMetric(model, {
|
|
20
|
+
scale: 1, // Default scale is 0-1
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
const result = await metric.measure(
|
|
24
|
+
"How is Sarah as a person?",
|
|
25
|
+
"Sarah is a dedicated team member who always delivers quality work.",
|
|
26
|
+
);
|
|
27
|
+
|
|
28
|
+
console.log(result.score); // Score from 0-1 (0 = not toxic, 1 = toxic)
|
|
29
|
+
console.log(result.info.reason); // Explanation of the toxicity assessment
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Constructor Parameters
|
|
33
|
+
|
|
34
|
+
<PropertiesTable
|
|
35
|
+
content={[
|
|
36
|
+
{
|
|
37
|
+
name: "model",
|
|
38
|
+
type: "LanguageModel",
|
|
39
|
+
description: "Configuration for the model used to evaluate toxicity",
|
|
40
|
+
isOptional: false,
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
name: "options",
|
|
44
|
+
type: "ToxicityMetricOptions",
|
|
45
|
+
description: "Configuration options for the metric",
|
|
46
|
+
isOptional: true,
|
|
47
|
+
defaultValue: "{ scale: 1 }",
|
|
48
|
+
},
|
|
49
|
+
]}
|
|
50
|
+
/>
|
|
51
|
+
|
|
52
|
+
### ToxicityMetricOptions
|
|
53
|
+
|
|
54
|
+
<PropertiesTable
|
|
55
|
+
content={[
|
|
56
|
+
{
|
|
57
|
+
name: "scale",
|
|
58
|
+
type: "number",
|
|
59
|
+
description: "Maximum score value (default is 1)",
|
|
60
|
+
isOptional: true,
|
|
61
|
+
defaultValue: "1",
|
|
62
|
+
},
|
|
63
|
+
]}
|
|
64
|
+
/>
|
|
65
|
+
|
|
66
|
+
## measure() Parameters
|
|
67
|
+
|
|
68
|
+
<PropertiesTable
|
|
69
|
+
content={[
|
|
70
|
+
{
|
|
71
|
+
name: "input",
|
|
72
|
+
type: "string",
|
|
73
|
+
description: "The original query or prompt",
|
|
74
|
+
isOptional: false,
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
name: "output",
|
|
78
|
+
type: "string",
|
|
79
|
+
description: "The LLM's response to evaluate",
|
|
80
|
+
isOptional: false,
|
|
81
|
+
},
|
|
82
|
+
]}
|
|
83
|
+
/>
|
|
84
|
+
|
|
85
|
+
## Returns
|
|
86
|
+
|
|
87
|
+
<PropertiesTable
|
|
88
|
+
content={[
|
|
89
|
+
{
|
|
90
|
+
name: "score",
|
|
91
|
+
type: "number",
|
|
92
|
+
description: "Toxicity score (0 to scale, default 0-1)",
|
|
93
|
+
},
|
|
94
|
+
{
|
|
95
|
+
name: "info",
|
|
96
|
+
type: "object",
|
|
97
|
+
description: "Detailed toxicity info",
|
|
98
|
+
properties: [
|
|
99
|
+
{
|
|
100
|
+
type: "string",
|
|
101
|
+
parameters: [
|
|
102
|
+
{
|
|
103
|
+
name: "reason",
|
|
104
|
+
type: "string",
|
|
105
|
+
description: "Detailed explanation of the toxicity assessment",
|
|
106
|
+
},
|
|
107
|
+
],
|
|
108
|
+
},
|
|
109
|
+
],
|
|
110
|
+
},
|
|
111
|
+
]}
|
|
112
|
+
/>
|
|
113
|
+
|
|
114
|
+
## Scoring Details
|
|
115
|
+
|
|
116
|
+
The metric evaluates toxicity through multiple aspects:
|
|
117
|
+
- Personal attacks
|
|
118
|
+
- Mockery or sarcasm
|
|
119
|
+
- Hate speech
|
|
120
|
+
- Dismissive statements
|
|
121
|
+
- Threats or intimidation
|
|
122
|
+
|
|
123
|
+
### Scoring Process
|
|
124
|
+
|
|
125
|
+
1. Analyzes toxic elements:
|
|
126
|
+
- Identifies personal attacks and mockery
|
|
127
|
+
- Detects hate speech and threats
|
|
128
|
+
- Evaluates dismissive statements
|
|
129
|
+
- Assesses severity levels
|
|
130
|
+
|
|
131
|
+
2. Calculates toxicity score:
|
|
132
|
+
- Weighs detected elements
|
|
133
|
+
- Combines severity ratings
|
|
134
|
+
- Normalizes to scale
|
|
135
|
+
|
|
136
|
+
Final score: `(toxicity_weighted_sum / max_toxicity) * scale`
|
|
137
|
+
|
|
138
|
+
### Score interpretation
|
|
139
|
+
(0 to scale, default 0-1)
|
|
140
|
+
- 0.8-1.0: Severe toxicity
|
|
141
|
+
- 0.4-0.7: Moderate toxicity
|
|
142
|
+
- 0.1-0.3: Mild toxicity
|
|
143
|
+
- 0.0: No toxic elements detected
|
|
144
|
+
|
|
145
|
+
## Example with Custom Configuration
|
|
146
|
+
|
|
147
|
+
```typescript
|
|
148
|
+
import { openai } from "@ai-sdk/openai";
|
|
149
|
+
|
|
150
|
+
const model = openai("gpt-4o-mini");
|
|
151
|
+
|
|
152
|
+
const metric = new ToxicityMetric(model, {
|
|
153
|
+
scale: 10, // Use 0-10 scale instead of 0-1
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
const result = await metric.measure(
|
|
157
|
+
"What do you think about the new team member?",
|
|
158
|
+
"The new team member shows promise but needs significant improvement in basic skills.",
|
|
159
|
+
);
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## Related
|
|
163
|
+
|
|
164
|
+
- [Tone Consistency Metric](./tone-consistency)
|
|
165
|
+
- [Bias Metric](./bias)
|