@mastra/mcp-docs-server 0.0.1-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (246) hide show
  1. package/.docs/organized/changelogs/%40mastra%2Fastra.md +302 -0
  2. package/.docs/organized/changelogs/%40mastra%2Fchroma.md +302 -0
  3. package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +302 -0
  4. package/.docs/organized/changelogs/%40mastra%2Fcomposio.md +302 -0
  5. package/.docs/organized/changelogs/%40mastra%2Fcore.md +302 -0
  6. package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +302 -0
  7. package/.docs/organized/changelogs/%40mastra%2Fdeployer-netlify.md +302 -0
  8. package/.docs/organized/changelogs/%40mastra%2Fdeployer-vercel.md +302 -0
  9. package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +302 -0
  10. package/.docs/organized/changelogs/%40mastra%2Fevals.md +302 -0
  11. package/.docs/organized/changelogs/%40mastra%2Ffirecrawl.md +302 -0
  12. package/.docs/organized/changelogs/%40mastra%2Fgithub.md +302 -0
  13. package/.docs/organized/changelogs/%40mastra%2Floggers.md +302 -0
  14. package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +11 -0
  15. package/.docs/organized/changelogs/%40mastra%2Fmcp.md +302 -0
  16. package/.docs/organized/changelogs/%40mastra%2Fmemory.md +302 -0
  17. package/.docs/organized/changelogs/%40mastra%2Fpg.md +302 -0
  18. package/.docs/organized/changelogs/%40mastra%2Fpinecone.md +302 -0
  19. package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +302 -0
  20. package/.docs/organized/changelogs/%40mastra%2Fqdrant.md +302 -0
  21. package/.docs/organized/changelogs/%40mastra%2Frag.md +302 -0
  22. package/.docs/organized/changelogs/%40mastra%2Fragie.md +302 -0
  23. package/.docs/organized/changelogs/%40mastra%2Fspeech-azure.md +302 -0
  24. package/.docs/organized/changelogs/%40mastra%2Fspeech-deepgram.md +302 -0
  25. package/.docs/organized/changelogs/%40mastra%2Fspeech-elevenlabs.md +302 -0
  26. package/.docs/organized/changelogs/%40mastra%2Fspeech-google.md +302 -0
  27. package/.docs/organized/changelogs/%40mastra%2Fspeech-ibm.md +302 -0
  28. package/.docs/organized/changelogs/%40mastra%2Fspeech-murf.md +302 -0
  29. package/.docs/organized/changelogs/%40mastra%2Fspeech-openai.md +302 -0
  30. package/.docs/organized/changelogs/%40mastra%2Fspeech-playai.md +302 -0
  31. package/.docs/organized/changelogs/%40mastra%2Fspeech-replicate.md +302 -0
  32. package/.docs/organized/changelogs/%40mastra%2Fspeech-speechify.md +302 -0
  33. package/.docs/organized/changelogs/%40mastra%2Fstabilityai.md +302 -0
  34. package/.docs/organized/changelogs/%40mastra%2Fturbopuffer.md +59 -0
  35. package/.docs/organized/changelogs/%40mastra%2Fupstash.md +302 -0
  36. package/.docs/organized/changelogs/%40mastra%2Fvectorize.md +302 -0
  37. package/.docs/organized/changelogs/%40mastra%2Fvoice-deepgram.md +302 -0
  38. package/.docs/organized/changelogs/%40mastra%2Fvoice-elevenlabs.md +302 -0
  39. package/.docs/organized/changelogs/%40mastra%2Fvoice-google.md +302 -0
  40. package/.docs/organized/changelogs/%40mastra%2Fvoice-murf.md +302 -0
  41. package/.docs/organized/changelogs/%40mastra%2Fvoice-openai-realtime.md +24 -0
  42. package/.docs/organized/changelogs/%40mastra%2Fvoice-openai.md +302 -0
  43. package/.docs/organized/changelogs/%40mastra%2Fvoice-playai.md +302 -0
  44. package/.docs/organized/changelogs/%40mastra%2Fvoice-sarvam.md +12 -0
  45. package/.docs/organized/changelogs/%40mastra%2Fvoice-speechify.md +302 -0
  46. package/.docs/organized/changelogs/create-mastra.md +302 -0
  47. package/.docs/organized/changelogs/mastra.md +302 -0
  48. package/.docs/organized/code-examples/agent.md +385 -0
  49. package/.docs/organized/code-examples/ai-sdk-useChat.md +377 -0
  50. package/.docs/organized/code-examples/assistant-ui.md +37 -0
  51. package/.docs/organized/code-examples/bird-checker-with-express.md +235 -0
  52. package/.docs/organized/code-examples/bird-checker-with-nextjs-and-eval.md +360 -0
  53. package/.docs/organized/code-examples/bird-checker-with-nextjs.md +250 -0
  54. package/.docs/organized/code-examples/crypto-chatbot.md +96 -0
  55. package/.docs/organized/code-examples/fireworks-r1.md +159 -0
  56. package/.docs/organized/code-examples/integrations.md +184 -0
  57. package/.docs/organized/code-examples/mcp-configuration.md +341 -0
  58. package/.docs/organized/code-examples/memory-todo-agent.md +161 -0
  59. package/.docs/organized/code-examples/memory-with-context.md +167 -0
  60. package/.docs/organized/code-examples/memory-with-libsql.md +204 -0
  61. package/.docs/organized/code-examples/memory-with-pg.md +224 -0
  62. package/.docs/organized/code-examples/memory-with-upstash.md +268 -0
  63. package/.docs/organized/code-examples/quick-start.md +127 -0
  64. package/.docs/organized/code-examples/stock-price-tool.md +124 -0
  65. package/.docs/organized/code-examples/weather-agent.md +352 -0
  66. package/.docs/organized/code-examples/workflow-ai-recruiter.md +159 -0
  67. package/.docs/organized/code-examples/workflow-with-inline-steps.md +111 -0
  68. package/.docs/organized/code-examples/workflow-with-memory.md +393 -0
  69. package/.docs/organized/code-examples/workflow-with-separate-steps.md +131 -0
  70. package/.docs/raw/agents/00-overview.mdx +185 -0
  71. package/.docs/raw/agents/01-agent-memory.mdx +610 -0
  72. package/.docs/raw/agents/02-adding-tools.mdx +224 -0
  73. package/.docs/raw/agents/03-adding-voice.mdx +170 -0
  74. package/.docs/raw/deployment/deployment.mdx +156 -0
  75. package/.docs/raw/deployment/logging-and-tracing.mdx +242 -0
  76. package/.docs/raw/deployment/server.mdx +114 -0
  77. package/.docs/raw/evals/00-overview.mdx +106 -0
  78. package/.docs/raw/evals/01-supported-evals.mdx +31 -0
  79. package/.docs/raw/evals/02-custom-eval.mdx +187 -0
  80. package/.docs/raw/faq/index.mdx +63 -0
  81. package/.docs/raw/frameworks/01-next-js.mdx +238 -0
  82. package/.docs/raw/frameworks/02-ai-sdk.mdx +218 -0
  83. package/.docs/raw/getting-started/installation.mdx +436 -0
  84. package/.docs/raw/getting-started/project-structure.mdx +80 -0
  85. package/.docs/raw/guides/01-chef-michel.mdx +242 -0
  86. package/.docs/raw/guides/02-stock-agent.mdx +182 -0
  87. package/.docs/raw/guides/03-recruiter.mdx +187 -0
  88. package/.docs/raw/index.mdx +22 -0
  89. package/.docs/raw/local-dev/creating-projects.mdx +74 -0
  90. package/.docs/raw/local-dev/integrations.mdx +127 -0
  91. package/.docs/raw/local-dev/mastra-dev.mdx +65 -0
  92. package/.docs/raw/rag/chunking-and-embedding.mdx +128 -0
  93. package/.docs/raw/rag/overview.mdx +85 -0
  94. package/.docs/raw/rag/retrieval.mdx +362 -0
  95. package/.docs/raw/rag/vector-databases.mdx +271 -0
  96. package/.docs/raw/reference/agents/createTool.mdx +190 -0
  97. package/.docs/raw/reference/agents/generate.mdx +327 -0
  98. package/.docs/raw/reference/agents/getAgent.mdx +54 -0
  99. package/.docs/raw/reference/agents/stream.mdx +361 -0
  100. package/.docs/raw/reference/cli/build.mdx +48 -0
  101. package/.docs/raw/reference/cli/deploy.mdx +22 -0
  102. package/.docs/raw/reference/cli/dev.mdx +97 -0
  103. package/.docs/raw/reference/cli/init.mdx +43 -0
  104. package/.docs/raw/reference/client-js/agents.mdx +90 -0
  105. package/.docs/raw/reference/client-js/error-handling.mdx +38 -0
  106. package/.docs/raw/reference/client-js/index.mdx +127 -0
  107. package/.docs/raw/reference/client-js/logs.mdx +24 -0
  108. package/.docs/raw/reference/client-js/memory.mdx +94 -0
  109. package/.docs/raw/reference/client-js/telemetry.mdx +20 -0
  110. package/.docs/raw/reference/client-js/tools.mdx +44 -0
  111. package/.docs/raw/reference/client-js/vectors.mdx +79 -0
  112. package/.docs/raw/reference/client-js/workflows.mdx +137 -0
  113. package/.docs/raw/reference/core/mastra-class.mdx +232 -0
  114. package/.docs/raw/reference/deployer/cloudflare.mdx +176 -0
  115. package/.docs/raw/reference/deployer/deployer.mdx +159 -0
  116. package/.docs/raw/reference/deployer/netlify.mdx +88 -0
  117. package/.docs/raw/reference/deployer/vercel.mdx +97 -0
  118. package/.docs/raw/reference/evals/answer-relevancy.mdx +186 -0
  119. package/.docs/raw/reference/evals/bias.mdx +186 -0
  120. package/.docs/raw/reference/evals/completeness.mdx +174 -0
  121. package/.docs/raw/reference/evals/content-similarity.mdx +183 -0
  122. package/.docs/raw/reference/evals/context-position.mdx +190 -0
  123. package/.docs/raw/reference/evals/context-precision.mdx +189 -0
  124. package/.docs/raw/reference/evals/context-relevancy.mdx +188 -0
  125. package/.docs/raw/reference/evals/contextual-recall.mdx +191 -0
  126. package/.docs/raw/reference/evals/faithfulness.mdx +193 -0
  127. package/.docs/raw/reference/evals/hallucination.mdx +219 -0
  128. package/.docs/raw/reference/evals/keyword-coverage.mdx +176 -0
  129. package/.docs/raw/reference/evals/prompt-alignment.mdx +238 -0
  130. package/.docs/raw/reference/evals/summarization.mdx +205 -0
  131. package/.docs/raw/reference/evals/textual-difference.mdx +161 -0
  132. package/.docs/raw/reference/evals/tone-consistency.mdx +181 -0
  133. package/.docs/raw/reference/evals/toxicity.mdx +165 -0
  134. package/.docs/raw/reference/index.mdx +8 -0
  135. package/.docs/raw/reference/memory/Memory.mdx +186 -0
  136. package/.docs/raw/reference/memory/createThread.mdx +93 -0
  137. package/.docs/raw/reference/memory/getThreadById.mdx +43 -0
  138. package/.docs/raw/reference/memory/getThreadsByResourceId.mdx +45 -0
  139. package/.docs/raw/reference/memory/query.mdx +164 -0
  140. package/.docs/raw/reference/observability/create-logger.mdx +106 -0
  141. package/.docs/raw/reference/observability/logger.mdx +55 -0
  142. package/.docs/raw/reference/observability/otel-config.mdx +120 -0
  143. package/.docs/raw/reference/observability/providers/braintrust.mdx +40 -0
  144. package/.docs/raw/reference/observability/providers/index.mdx +15 -0
  145. package/.docs/raw/reference/observability/providers/laminar.mdx +41 -0
  146. package/.docs/raw/reference/observability/providers/langfuse.mdx +51 -0
  147. package/.docs/raw/reference/observability/providers/langsmith.mdx +46 -0
  148. package/.docs/raw/reference/observability/providers/langwatch.mdx +45 -0
  149. package/.docs/raw/reference/observability/providers/new-relic.mdx +40 -0
  150. package/.docs/raw/reference/observability/providers/signoz.mdx +40 -0
  151. package/.docs/raw/reference/observability/providers/traceloop.mdx +40 -0
  152. package/.docs/raw/reference/rag/astra.mdx +258 -0
  153. package/.docs/raw/reference/rag/chroma.mdx +281 -0
  154. package/.docs/raw/reference/rag/chunk.mdx +237 -0
  155. package/.docs/raw/reference/rag/document.mdx +129 -0
  156. package/.docs/raw/reference/rag/embeddings.mdx +160 -0
  157. package/.docs/raw/reference/rag/extract-params.mdx +72 -0
  158. package/.docs/raw/reference/rag/graph-rag.mdx +182 -0
  159. package/.docs/raw/reference/rag/libsql.mdx +357 -0
  160. package/.docs/raw/reference/rag/metadata-filters.mdx +298 -0
  161. package/.docs/raw/reference/rag/pg.mdx +477 -0
  162. package/.docs/raw/reference/rag/pinecone.mdx +249 -0
  163. package/.docs/raw/reference/rag/qdrant.mdx +236 -0
  164. package/.docs/raw/reference/rag/rerank.mdx +212 -0
  165. package/.docs/raw/reference/rag/turbopuffer.mdx +249 -0
  166. package/.docs/raw/reference/rag/upstash.mdx +198 -0
  167. package/.docs/raw/reference/rag/vectorize.mdx +253 -0
  168. package/.docs/raw/reference/storage/libsql.mdx +74 -0
  169. package/.docs/raw/reference/storage/postgresql.mdx +48 -0
  170. package/.docs/raw/reference/storage/upstash.mdx +86 -0
  171. package/.docs/raw/reference/tools/client.mdx +180 -0
  172. package/.docs/raw/reference/tools/document-chunker-tool.mdx +141 -0
  173. package/.docs/raw/reference/tools/graph-rag-tool.mdx +154 -0
  174. package/.docs/raw/reference/tools/mcp-configuration.mdx +206 -0
  175. package/.docs/raw/reference/tools/vector-query-tool.mdx +212 -0
  176. package/.docs/raw/reference/voice/composite-voice.mdx +140 -0
  177. package/.docs/raw/reference/voice/deepgram.mdx +164 -0
  178. package/.docs/raw/reference/voice/elevenlabs.mdx +216 -0
  179. package/.docs/raw/reference/voice/google.mdx +198 -0
  180. package/.docs/raw/reference/voice/mastra-voice.mdx +394 -0
  181. package/.docs/raw/reference/voice/murf.mdx +251 -0
  182. package/.docs/raw/reference/voice/openai-realtime.mdx +431 -0
  183. package/.docs/raw/reference/voice/openai.mdx +168 -0
  184. package/.docs/raw/reference/voice/playai.mdx +159 -0
  185. package/.docs/raw/reference/voice/speechify.mdx +145 -0
  186. package/.docs/raw/reference/workflows/after.mdx +88 -0
  187. package/.docs/raw/reference/workflows/commit.mdx +37 -0
  188. package/.docs/raw/reference/workflows/createRun.mdx +77 -0
  189. package/.docs/raw/reference/workflows/else.mdx +72 -0
  190. package/.docs/raw/reference/workflows/execute.mdx +110 -0
  191. package/.docs/raw/reference/workflows/if.mdx +107 -0
  192. package/.docs/raw/reference/workflows/resume.mdx +155 -0
  193. package/.docs/raw/reference/workflows/start.mdx +84 -0
  194. package/.docs/raw/reference/workflows/step-class.mdx +100 -0
  195. package/.docs/raw/reference/workflows/step-condition.mdx +134 -0
  196. package/.docs/raw/reference/workflows/step-function.mdx +92 -0
  197. package/.docs/raw/reference/workflows/step-options.mdx +69 -0
  198. package/.docs/raw/reference/workflows/suspend.mdx +80 -0
  199. package/.docs/raw/reference/workflows/then.mdx +74 -0
  200. package/.docs/raw/reference/workflows/until.mdx +165 -0
  201. package/.docs/raw/reference/workflows/watch.mdx +118 -0
  202. package/.docs/raw/reference/workflows/while.mdx +168 -0
  203. package/.docs/raw/reference/workflows/workflow.mdx +233 -0
  204. package/.docs/raw/workflows/00-overview.mdx +168 -0
  205. package/.docs/raw/workflows/control-flow.mdx +712 -0
  206. package/.docs/raw/workflows/dynamic-workflows.mdx +232 -0
  207. package/.docs/raw/workflows/steps.mdx +98 -0
  208. package/.docs/raw/workflows/suspend-and-resume.mdx +196 -0
  209. package/.docs/raw/workflows/variables.mdx +248 -0
  210. package/LICENSE +44 -0
  211. package/README.md +129 -0
  212. package/dist/index.d.ts +3 -0
  213. package/dist/index.js +19 -0
  214. package/dist/prepare-docs/code-examples.d.ts +4 -0
  215. package/dist/prepare-docs/code-examples.js +91 -0
  216. package/dist/prepare-docs/copy-raw.d.ts +1 -0
  217. package/dist/prepare-docs/copy-raw.js +41 -0
  218. package/dist/prepare-docs/index.d.ts +1 -0
  219. package/dist/prepare-docs/index.js +8 -0
  220. package/dist/prepare-docs/package-changes.d.ts +4 -0
  221. package/dist/prepare-docs/package-changes.js +92 -0
  222. package/dist/prepare-docs/prepare.d.ts +1 -0
  223. package/dist/prepare-docs/prepare.js +13 -0
  224. package/dist/sse.d.ts +1 -0
  225. package/dist/sse.js +9 -0
  226. package/dist/stdio.d.ts +1 -0
  227. package/dist/stdio.js +8 -0
  228. package/dist/tools/__tests__/blog.test.d.ts +1 -0
  229. package/dist/tools/__tests__/blog.test.js +48 -0
  230. package/dist/tools/__tests__/changes.test.d.ts +1 -0
  231. package/dist/tools/__tests__/changes.test.js +36 -0
  232. package/dist/tools/__tests__/docs.test.d.ts +1 -0
  233. package/dist/tools/__tests__/docs.test.js +46 -0
  234. package/dist/tools/__tests__/examples.test.d.ts +1 -0
  235. package/dist/tools/__tests__/examples.test.js +52 -0
  236. package/dist/tools/blog.d.ts +15 -0
  237. package/dist/tools/blog.js +73 -0
  238. package/dist/tools/changes.d.ts +11 -0
  239. package/dist/tools/changes.js +69 -0
  240. package/dist/tools/docs.d.ts +11 -0
  241. package/dist/tools/docs.js +176 -0
  242. package/dist/tools/examples.d.ts +11 -0
  243. package/dist/tools/examples.js +61 -0
  244. package/dist/utils.d.ts +6 -0
  245. package/dist/utils.js +9 -0
  246. package/package.json +66 -0
@@ -0,0 +1,205 @@
1
+ ---
2
+ title: "Reference: Summarization | Metrics | Evals | Mastra Docs"
3
+ description: Documentation for the Summarization Metric in Mastra, which evaluates the quality of LLM-generated summaries for content and factual accuracy.
4
+ ---
5
+
6
+ # SummarizationMetric
7
+ ,
8
+ The `SummarizationMetric` evaluates how well an LLM's summary captures the original text's content while maintaining factual accuracy. It combines two aspects: alignment (factual correctness) and coverage (inclusion of key information), using the minimum scores to ensure both qualities are necessary for a good summary.
9
+
10
+ ## Basic Usage
11
+
12
+ ```typescript
13
+ import { openai } from "@ai-sdk/openai";
14
+ import { SummarizationMetric } from "@mastra/evals/llm";
15
+
16
+ // Configure the model for evaluation
17
+ const model = openai("gpt-4o-mini");
18
+
19
+ const metric = new SummarizationMetric(model);
20
+
21
+ const result = await metric.measure(
22
+ "The company was founded in 1995 by John Smith. It started with 10 employees and grew to 500 by 2020. The company is based in Seattle.",
23
+ "Founded in 1995 by John Smith, the company grew from 10 to 500 employees by 2020.",
24
+ );
25
+
26
+ console.log(result.score); // Score from 0-1
27
+ console.log(result.info); // Object containing detailed metrics about the summary
28
+ ```
29
+
30
+ ## Constructor Parameters
31
+
32
+ <PropertiesTable
33
+ content={[
34
+ {
35
+ name: "model",
36
+ type: "LanguageModel",
37
+ description: "Configuration for the model used to evaluate summaries",
38
+ isOptional: false,
39
+ },
40
+ {
41
+ name: "options",
42
+ type: "SummarizationMetricOptions",
43
+ description: "Configuration options for the metric",
44
+ isOptional: true,
45
+ defaultValue: "{ scale: 1 }",
46
+ },
47
+ ]}
48
+ />
49
+
50
+ ### SummarizationMetricOptions
51
+
52
+ <PropertiesTable
53
+ content={[
54
+ {
55
+ name: "scale",
56
+ type: "number",
57
+ description: "Maximum score value",
58
+ isOptional: true,
59
+ defaultValue: "1",
60
+ },
61
+ ]}
62
+ />
63
+
64
+ ## measure() Parameters
65
+
66
+ <PropertiesTable
67
+ content={[
68
+ {
69
+ name: "input",
70
+ type: "string",
71
+ description: "The original text to be summarized",
72
+ isOptional: false,
73
+ },
74
+ {
75
+ name: "output",
76
+ type: "string",
77
+ description: "The generated summary to evaluate",
78
+ isOptional: false,
79
+ },
80
+ ]}
81
+ />
82
+
83
+ ## Returns
84
+
85
+ <PropertiesTable
86
+ content={[
87
+ {
88
+ name: "score",
89
+ type: "number",
90
+ description: "Summarization score (0 to scale, default 0-1)",
91
+ },
92
+ {
93
+ name: "info",
94
+ type: "object",
95
+ description: "Object containing detailed metrics about the summary",
96
+ properties: [
97
+ {
98
+ type: "string",
99
+ parameters: [
100
+ {
101
+ name: "reason",
102
+ type: "string",
103
+ description:
104
+ "Detailed explanation of the score, including both alignment and coverage aspects",
105
+ },
106
+ ],
107
+ },
108
+ {
109
+ type: "number",
110
+ parameters: [
111
+ {
112
+ name: "alignmentScore",
113
+ type: "number",
114
+ description: "Alignment score (0 to 1)",
115
+ },
116
+ ],
117
+ },
118
+ {
119
+ type: "number",
120
+ parameters: [
121
+ {
122
+ name: "coverageScore",
123
+ type: "number",
124
+ description: "Coverage score (0 to 1)",
125
+ },
126
+ ],
127
+ },
128
+ ],
129
+ },
130
+ ]}
131
+ />
132
+
133
+ ## Scoring Details
134
+
135
+ The metric evaluates summaries through two essential components:
136
+ 1. **Alignment Score**: Measures factual correctness
137
+ - Extracts claims from the summary
138
+ - Verifies each claim against the original text
139
+ - Assigns "yes", "no", or "unsure" verdicts
140
+
141
+ 2. **Coverage Score**: Measures inclusion of key information
142
+ - Generates key questions from the original text
143
+ - Check if the summary answers these questions
144
+ - Checks information inclusion and assesses comprehensiveness
145
+
146
+ ### Scoring Process
147
+
148
+ 1. Calculates alignment score:
149
+ - Extracts claims from summary
150
+ - Verifies against source text
151
+ - Computes: `supported_claims / total_claims`
152
+
153
+ 2. Determines coverage score:
154
+ - Generates questions from source
155
+ - Checks summary for answers
156
+ - Evaluates completeness
157
+ - Calculates: `answerable_questions / total_questions`
158
+
159
+ Final score: `min(alignment_score, coverage_score) * scale`
160
+
161
+ ### Score interpretation
162
+ (0 to scale, default 0-1)
163
+ - 1.0: Perfect summary - completely factual and covers all key information
164
+ - 0.7-0.9: Strong summary with minor omissions or slight inaccuracies
165
+ - 0.4-0.6: Moderate quality with significant gaps or inaccuracies
166
+ - 0.1-0.3: Poor summary with major omissions or factual errors
167
+ - 0.0: Invalid summary - either completely inaccurate or missing critical information
168
+
169
+ ## Example with Analysis
170
+
171
+ ```typescript
172
+ import { openai } from "@ai-sdk/openai";
173
+ import { SummarizationMetric } from "@mastra/evals/llm";
174
+
175
+ // Configure the model for evaluation
176
+ const model = openai("gpt-4o-mini");
177
+
178
+ const metric = new SummarizationMetric(model);
179
+
180
+ const result = await metric.measure(
181
+ "The electric car company Tesla was founded in 2003 by Martin Eberhard and Marc Tarpenning. Elon Musk joined in 2004 as the largest investor and became CEO in 2008. The company's first car, the Roadster, was launched in 2008.",
182
+ "Tesla, founded by Elon Musk in 2003, revolutionized the electric car industry starting with the Roadster in 2008.",
183
+ );
184
+
185
+ // Example output:
186
+ // {
187
+ // score: 0.5,
188
+ // info: {
189
+ // reason: "The score is 0.5 because while the coverage is good (0.75) - mentioning the founding year,
190
+ // first car model, and launch date - the alignment score is lower (0.5) due to incorrectly
191
+ // attributing the company's founding to Elon Musk instead of Martin Eberhard and Marc Tarpenning.
192
+ // The final score takes the minimum of these two scores to ensure both factual accuracy and
193
+ // coverage are necessary for a good summary."
194
+ // alignmentScore: 0.5,
195
+ // coverageScore: 0.75,
196
+ // }
197
+ // }
198
+ ```
199
+
200
+ ## Related
201
+
202
+ - [Faithfulness Metric](./faithfulness)
203
+ - [Completeness Metric](./completeness)
204
+ - [Contextual Recall Metric](./contextual-recall)
205
+ - [Hallucination Metric](./hallucination)
@@ -0,0 +1,161 @@
1
+ ---
2
+ title: "Reference: Textual Difference | Evals | Mastra Docs"
3
+ description: Documentation for the Textual Difference Metric in Mastra, which measures textual differences between strings using sequence matching.
4
+ ---
5
+
6
+ # TextualDifferenceMetric
7
+
8
+ The `TextualDifferenceMetric` class uses sequence matching to measure the textual differences between two strings. It provides detailed information about changes, including the number of operations needed to transform one text into another.
9
+
10
+ ## Basic Usage
11
+
12
+ ```typescript
13
+ import { TextualDifferenceMetric } from "@mastra/evals/nlp";
14
+
15
+ const metric = new TextualDifferenceMetric();
16
+
17
+ const result = await metric.measure(
18
+ "The quick brown fox",
19
+ "The fast brown fox"
20
+ );
21
+
22
+ console.log(result.score); // Similarity ratio from 0-1
23
+ console.log(result.info); // Detailed change metrics
24
+ ```
25
+
26
+ ## measure() Parameters
27
+
28
+ <PropertiesTable
29
+ content={[
30
+ {
31
+ name: "input",
32
+ type: "string",
33
+ description: "The original text to compare against",
34
+ isOptional: false,
35
+ },
36
+ {
37
+ name: "output",
38
+ type: "string",
39
+ description: "The text to evaluate for differences",
40
+ isOptional: false,
41
+ }
42
+ ]}
43
+ />
44
+
45
+ ## Returns
46
+
47
+ <PropertiesTable
48
+ content={[
49
+ {
50
+ name: "score",
51
+ type: "number",
52
+ description: "Similarity ratio (0-1) where 1 indicates identical texts",
53
+ },
54
+ {
55
+ name: "info",
56
+ description: "Detailed metrics about the differences",
57
+ properties: [
58
+ {
59
+ type: "number",
60
+ parameters: [
61
+ {
62
+ name: "confidence",
63
+ type: "number",
64
+ description: "Confidence score based on length difference between texts (0-1)",
65
+ }
66
+ ]
67
+ },
68
+ {
69
+ type: "number",
70
+ parameters: [
71
+ {
72
+ name: "ratio",
73
+ type: "number",
74
+ description: "Raw similarity ratio between the texts",
75
+ }
76
+ ]
77
+ },
78
+ {
79
+ type: "number",
80
+ parameters: [
81
+ {
82
+ name: "changes",
83
+ type: "number",
84
+ description: "Number of change operations (insertions, deletions, replacements)",
85
+ }
86
+ ]
87
+ },
88
+ {
89
+ type: "number",
90
+ parameters: [
91
+ {
92
+ name: "lengthDiff",
93
+ type: "number",
94
+ description: "Normalized difference in length between input and output (0-1)",
95
+ }
96
+ ]
97
+ },
98
+ ]
99
+ },
100
+ ]}
101
+ />
102
+
103
+ ## Scoring Details
104
+
105
+ The metric calculates several measures:
106
+ - **Similarity Ratio**: Based on sequence matching between texts (0-1)
107
+ - **Changes**: Count of non-matching operations needed
108
+ - **Length Difference**: Normalized difference in text lengths
109
+ - **Confidence**: Inversely proportional to length difference
110
+
111
+ ### Scoring Process
112
+
113
+ 1. Analyzes textual differences:
114
+ - Performs sequence matching between input and output
115
+ - Counts the number of change operations required
116
+ - Measures length differences
117
+
118
+ 2. Calculates metrics:
119
+ - Computes similarity ratio
120
+ - Determines confidence score
121
+ - Combines into weighted score
122
+
123
+ Final score: `(similarity_ratio * confidence) * scale`
124
+
125
+ ### Score interpretation
126
+ (0 to scale, default 0-1)
127
+ - 1.0: Identical texts - no differences
128
+ - 0.7-0.9: Minor differences - few changes needed
129
+ - 0.4-0.6: Moderate differences - significant changes
130
+ - 0.1-0.3: Major differences - extensive changes
131
+ - 0.0: Completely different texts
132
+
133
+ ## Example with Analysis
134
+
135
+ ```typescript
136
+ import { TextualDifferenceMetric } from "@mastra/evals/nlp";
137
+
138
+ const metric = new TextualDifferenceMetric();
139
+
140
+ const result = await metric.measure(
141
+ "Hello world! How are you?",
142
+ "Hello there! How is it going?"
143
+ );
144
+
145
+ // Example output:
146
+ // {
147
+ // score: 0.65,
148
+ // info: {
149
+ // confidence: 0.95,
150
+ // ratio: 0.65,
151
+ // changes: 2,
152
+ // lengthDiff: 0.05
153
+ // }
154
+ // }
155
+ ```
156
+
157
+ ## Related
158
+
159
+ - [Content Similarity Metric](./content-similarity)
160
+ - [Completeness Metric](./completeness)
161
+ - [Keyword Coverage Metric](./keyword-coverage)
@@ -0,0 +1,181 @@
1
+ ---
2
+ title: "Reference: Tone Consistency | Metrics | Evals | Mastra Docs"
3
+ description: Documentation for the Tone Consistency Metric in Mastra, which evaluates emotional tone and sentiment consistency in text.
4
+ ---
5
+
6
+ # ToneConsistencyMetric
7
+
8
+ The `ToneConsistencyMetric` class evaluates the text's emotional tone and sentiment consistency. It can operate in two modes: comparing tone between input/output pairs or analyzing tone stability within a single text.
9
+
10
+ ## Basic Usage
11
+
12
+ ```typescript
13
+ import { ToneConsistencyMetric } from "@mastra/evals/nlp";
14
+
15
+ const metric = new ToneConsistencyMetric();
16
+
17
+ // Compare tone between input and output
18
+ const result1 = await metric.measure(
19
+ "I love this amazing product!",
20
+ "This product is wonderful and fantastic!"
21
+ );
22
+
23
+ // Analyze tone stability in a single text
24
+ const result2 = await metric.measure(
25
+ "The service is excellent. The staff is friendly. The atmosphere is perfect.",
26
+ "" // Empty string for single-text analysis
27
+ );
28
+
29
+ console.log(result1.score); // Tone consistency score from 0-1
30
+ console.log(result2.score); // Tone stability score from 0-1
31
+ ```
32
+
33
+ ## measure() Parameters
34
+
35
+ <PropertiesTable
36
+ content={[
37
+ {
38
+ name: "input",
39
+ type: "string",
40
+ description: "The text to analyze for tone",
41
+ isOptional: false,
42
+ },
43
+ {
44
+ name: "output",
45
+ type: "string",
46
+ description: "Reference text for tone comparison (empty string for stability analysis)",
47
+ isOptional: false,
48
+ }
49
+ ]}
50
+ />
51
+
52
+ ## Returns
53
+
54
+ <PropertiesTable
55
+ content={[
56
+ {
57
+ name: "score",
58
+ type: "number",
59
+ description: "Tone consistency/stability score (0-1)",
60
+ },
61
+ {
62
+ name: "info",
63
+ type: "object",
64
+ description: "Detailed tone info",
65
+ }
66
+ ]}
67
+ />
68
+
69
+ ### info Object (Tone Comparison)
70
+
71
+ <PropertiesTable
72
+ content={[
73
+ {
74
+ name: "responseSentiment",
75
+ type: "number",
76
+ description: "Sentiment score for the input text",
77
+ },
78
+ {
79
+ name: "referenceSentiment",
80
+ type: "number",
81
+ description: "Sentiment score for the output text",
82
+ },
83
+ {
84
+ name: "difference",
85
+ type: "number",
86
+ description: "Absolute difference between sentiment scores",
87
+ }
88
+ ]}
89
+ />
90
+
91
+ ### info Object (Tone Stability)
92
+
93
+ <PropertiesTable
94
+ content={[
95
+ {
96
+ name: "avgSentiment",
97
+ type: "number",
98
+ description: "Average sentiment score across sentences",
99
+ },
100
+ {
101
+ name: "sentimentVariance",
102
+ type: "number",
103
+ description: "Variance in sentiment between sentences",
104
+ }
105
+ ]}
106
+ />
107
+
108
+
109
+ ## Scoring Details
110
+
111
+ The metric evaluates sentiment consistency through tone pattern analysis and mode-specific scoring.
112
+
113
+ ### Scoring Process
114
+
115
+ 1. Analyzes tone patterns:
116
+ - Extracts sentiment features
117
+ - Computes sentiment scores
118
+ - Measures tone variations
119
+
120
+ 2. Calculates mode-specific score:
121
+ **Tone Consistency** (input and output):
122
+ - Compares sentiment between texts
123
+ - Calculates sentiment difference
124
+ - Score = 1 - (sentiment_difference / max_difference)
125
+
126
+ **Tone Stability** (single input):
127
+ - Analyzes sentiment across sentences
128
+ - Calculates sentiment variance
129
+ - Score = 1 - (sentiment_variance / max_variance)
130
+
131
+ Final score: `mode_specific_score * scale`
132
+
133
+ ### Score interpretation
134
+ (0 to scale, default 0-1)
135
+ - 1.0: Perfect tone consistency/stability
136
+ - 0.7-0.9: Strong consistency with minor variations
137
+ - 0.4-0.6: Moderate consistency with noticeable shifts
138
+ - 0.1-0.3: Poor consistency with major tone changes
139
+ - 0.0: No consistency - completely different tones
140
+
141
+ ## Example with Both Modes
142
+
143
+ ```typescript
144
+ import { ToneConsistencyMetric } from "@mastra/evals/nlp";
145
+
146
+ const metric = new ToneConsistencyMetric();
147
+
148
+ // Tone Consistency Mode
149
+ const consistencyResult = await metric.measure(
150
+ "This product is fantastic and amazing!",
151
+ "The product is excellent and wonderful!"
152
+ );
153
+ // Example output:
154
+ // {
155
+ // score: 0.95,
156
+ // info: {
157
+ // responseSentiment: 0.8,
158
+ // referenceSentiment: 0.75,
159
+ // difference: 0.05
160
+ // }
161
+ // }
162
+
163
+ // Tone Stability Mode
164
+ const stabilityResult = await metric.measure(
165
+ "Great service! Friendly staff. Perfect atmosphere.",
166
+ ""
167
+ );
168
+ // Example output:
169
+ // {
170
+ // score: 0.9,
171
+ // info: {
172
+ // avgSentiment: 0.6,
173
+ // sentimentVariance: 0.1
174
+ // }
175
+ // }
176
+ ```
177
+
178
+ ## Related
179
+
180
+ - [Content Similarity Metric](./content-similarity)
181
+ - [Toxicity Metric](./toxicity)
@@ -0,0 +1,165 @@
1
+ ---
2
+ title: "Reference: Toxicity | Metrics | Evals | Mastra Docs"
3
+ description: Documentation for the Toxicity Metric in Mastra, which evaluates LLM outputs for racist, biased, or toxic elements.
4
+ ---
5
+
6
+ # ToxicityMetric
7
+
8
+ The `ToxicityMetric` class evaluates whether an LLM's output contains racist, biased, or toxic elements. It uses a judge-based system to analyze responses for various forms of toxicity including personal attacks, mockery, hate speech, dismissive statements, and threats.
9
+
10
+ ## Basic Usage
11
+
12
+ ```typescript
13
+ import { openai } from "@ai-sdk/openai";
14
+ import { ToxicityMetric } from "@mastra/evals/llm";
15
+
16
+ // Configure the model for evaluation
17
+ const model = openai("gpt-4o-mini");
18
+
19
+ const metric = new ToxicityMetric(model, {
20
+ scale: 1, // Default scale is 0-1
21
+ });
22
+
23
+ const result = await metric.measure(
24
+ "How is Sarah as a person?",
25
+ "Sarah is a dedicated team member who always delivers quality work.",
26
+ );
27
+
28
+ console.log(result.score); // Score from 0-1 (0 = not toxic, 1 = toxic)
29
+ console.log(result.info.reason); // Explanation of the toxicity assessment
30
+ ```
31
+
32
+ ## Constructor Parameters
33
+
34
+ <PropertiesTable
35
+ content={[
36
+ {
37
+ name: "model",
38
+ type: "LanguageModel",
39
+ description: "Configuration for the model used to evaluate toxicity",
40
+ isOptional: false,
41
+ },
42
+ {
43
+ name: "options",
44
+ type: "ToxicityMetricOptions",
45
+ description: "Configuration options for the metric",
46
+ isOptional: true,
47
+ defaultValue: "{ scale: 1 }",
48
+ },
49
+ ]}
50
+ />
51
+
52
+ ### ToxicityMetricOptions
53
+
54
+ <PropertiesTable
55
+ content={[
56
+ {
57
+ name: "scale",
58
+ type: "number",
59
+ description: "Maximum score value (default is 1)",
60
+ isOptional: true,
61
+ defaultValue: "1",
62
+ },
63
+ ]}
64
+ />
65
+
66
+ ## measure() Parameters
67
+
68
+ <PropertiesTable
69
+ content={[
70
+ {
71
+ name: "input",
72
+ type: "string",
73
+ description: "The original query or prompt",
74
+ isOptional: false,
75
+ },
76
+ {
77
+ name: "output",
78
+ type: "string",
79
+ description: "The LLM's response to evaluate",
80
+ isOptional: false,
81
+ },
82
+ ]}
83
+ />
84
+
85
+ ## Returns
86
+
87
+ <PropertiesTable
88
+ content={[
89
+ {
90
+ name: "score",
91
+ type: "number",
92
+ description: "Toxicity score (0 to scale, default 0-1)",
93
+ },
94
+ {
95
+ name: "info",
96
+ type: "object",
97
+ description: "Detailed toxicity info",
98
+ properties: [
99
+ {
100
+ type: "string",
101
+ parameters: [
102
+ {
103
+ name: "reason",
104
+ type: "string",
105
+ description: "Detailed explanation of the toxicity assessment",
106
+ },
107
+ ],
108
+ },
109
+ ],
110
+ },
111
+ ]}
112
+ />
113
+
114
+ ## Scoring Details
115
+
116
+ The metric evaluates toxicity through multiple aspects:
117
+ - Personal attacks
118
+ - Mockery or sarcasm
119
+ - Hate speech
120
+ - Dismissive statements
121
+ - Threats or intimidation
122
+
123
+ ### Scoring Process
124
+
125
+ 1. Analyzes toxic elements:
126
+ - Identifies personal attacks and mockery
127
+ - Detects hate speech and threats
128
+ - Evaluates dismissive statements
129
+ - Assesses severity levels
130
+
131
+ 2. Calculates toxicity score:
132
+ - Weighs detected elements
133
+ - Combines severity ratings
134
+ - Normalizes to scale
135
+
136
+ Final score: `(toxicity_weighted_sum / max_toxicity) * scale`
137
+
138
+ ### Score interpretation
139
+ (0 to scale, default 0-1)
140
+ - 0.8-1.0: Severe toxicity
141
+ - 0.4-0.7: Moderate toxicity
142
+ - 0.1-0.3: Mild toxicity
143
+ - 0.0: No toxic elements detected
144
+
145
+ ## Example with Custom Configuration
146
+
147
+ ```typescript
148
+ import { openai } from "@ai-sdk/openai";
149
+
150
+ const model = openai("gpt-4o-mini");
151
+
152
+ const metric = new ToxicityMetric(model, {
153
+ scale: 10, // Use 0-10 scale instead of 0-1
154
+ });
155
+
156
+ const result = await metric.measure(
157
+ "What do you think about the new team member?",
158
+ "The new team member shows promise but needs significant improvement in basic skills.",
159
+ );
160
+ ```
161
+
162
+ ## Related
163
+
164
+ - [Tone Consistency Metric](./tone-consistency)
165
+ - [Bias Metric](./bias)