@mastra/mcp-docs-server 1.0.0-beta.3 → 1.0.0-beta.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. package/.docs/organized/changelogs/%40internal%2Fstorage-test-utils.md +201 -1
  2. package/.docs/organized/changelogs/%40mastra%2Fagent-builder.md +201 -1
  3. package/.docs/organized/changelogs/%40mastra%2Fai-sdk.md +201 -1
  4. package/.docs/organized/changelogs/%40mastra%2Fastra.md +201 -1
  5. package/.docs/organized/changelogs/%40mastra%2Fauth.md +6 -0
  6. package/.docs/organized/changelogs/%40mastra%2Fchroma.md +201 -1
  7. package/.docs/organized/changelogs/%40mastra%2Fclickhouse.md +201 -1
  8. package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +201 -1
  9. package/.docs/organized/changelogs/%40mastra%2Fcloudflare-d1.md +201 -1
  10. package/.docs/organized/changelogs/%40mastra%2Fcloudflare.md +201 -1
  11. package/.docs/organized/changelogs/%40mastra%2Fcore.md +370 -170
  12. package/.docs/organized/changelogs/%40mastra%2Fcouchbase.md +201 -1
  13. package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloud.md +201 -1
  14. package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +201 -1
  15. package/.docs/organized/changelogs/%40mastra%2Fdeployer-netlify.md +201 -1
  16. package/.docs/organized/changelogs/%40mastra%2Fdeployer-vercel.md +201 -1
  17. package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +201 -1
  18. package/.docs/organized/changelogs/%40mastra%2Fdynamodb.md +201 -1
  19. package/.docs/organized/changelogs/%40mastra%2Fevals.md +201 -1
  20. package/.docs/organized/changelogs/%40mastra%2Flance.md +201 -1
  21. package/.docs/organized/changelogs/%40mastra%2Flibsql.md +201 -1
  22. package/.docs/organized/changelogs/%40mastra%2Floggers.md +201 -1
  23. package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +201 -1
  24. package/.docs/organized/changelogs/%40mastra%2Fmcp-registry-registry.md +201 -1
  25. package/.docs/organized/changelogs/%40mastra%2Fmcp.md +201 -1
  26. package/.docs/organized/changelogs/%40mastra%2Fmemory.md +201 -1
  27. package/.docs/organized/changelogs/%40mastra%2Fmongodb.md +201 -1
  28. package/.docs/organized/changelogs/%40mastra%2Fmssql.md +201 -1
  29. package/.docs/organized/changelogs/%40mastra%2Fopensearch.md +201 -1
  30. package/.docs/organized/changelogs/%40mastra%2Fpg.md +201 -1
  31. package/.docs/organized/changelogs/%40mastra%2Fpinecone.md +201 -1
  32. package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +201 -1
  33. package/.docs/organized/changelogs/%40mastra%2Fqdrant.md +201 -1
  34. package/.docs/organized/changelogs/%40mastra%2Frag.md +201 -1
  35. package/.docs/organized/changelogs/%40mastra%2Freact.md +80 -1
  36. package/.docs/organized/changelogs/%40mastra%2Fs3vectors.md +9 -0
  37. package/.docs/organized/changelogs/%40mastra%2Fschema-compat.md +36 -0
  38. package/.docs/organized/changelogs/%40mastra%2Fserver.md +201 -1
  39. package/.docs/organized/changelogs/%40mastra%2Fturbopuffer.md +201 -1
  40. package/.docs/organized/changelogs/%40mastra%2Fupstash.md +201 -1
  41. package/.docs/organized/changelogs/%40mastra%2Fvectorize.md +201 -1
  42. package/.docs/organized/changelogs/%40mastra%2Fvoice-azure.md +201 -1
  43. package/.docs/organized/changelogs/%40mastra%2Fvoice-cloudflare.md +201 -1
  44. package/.docs/organized/changelogs/%40mastra%2Fvoice-deepgram.md +201 -1
  45. package/.docs/organized/changelogs/%40mastra%2Fvoice-elevenlabs.md +201 -1
  46. package/.docs/organized/changelogs/%40mastra%2Fvoice-gladia.md +92 -1
  47. package/.docs/organized/changelogs/%40mastra%2Fvoice-google-gemini-live.md +67 -1
  48. package/.docs/organized/changelogs/%40mastra%2Fvoice-google.md +201 -1
  49. package/.docs/organized/changelogs/%40mastra%2Fvoice-murf.md +201 -1
  50. package/.docs/organized/changelogs/%40mastra%2Fvoice-openai-realtime.md +201 -1
  51. package/.docs/organized/changelogs/%40mastra%2Fvoice-openai.md +201 -1
  52. package/.docs/organized/changelogs/%40mastra%2Fvoice-playai.md +201 -1
  53. package/.docs/organized/changelogs/%40mastra%2Fvoice-sarvam.md +201 -1
  54. package/.docs/organized/changelogs/%40mastra%2Fvoice-speechify.md +201 -1
  55. package/.docs/organized/changelogs/create-mastra.md +201 -1
  56. package/.docs/organized/changelogs/mastra.md +201 -1
  57. package/.docs/organized/code-examples/memory-with-processors.md +1 -1
  58. package/.docs/organized/code-examples/quick-start.md +1 -1
  59. package/.docs/raw/agents/adding-voice.mdx +55 -9
  60. package/.docs/raw/agents/guardrails.mdx +19 -20
  61. package/.docs/raw/agents/human-in-the-loop-with-tools.mdx +6 -5
  62. package/.docs/raw/agents/networks.mdx +1 -2
  63. package/.docs/raw/agents/overview.mdx +5 -5
  64. package/.docs/raw/agents/using-tools.mdx +4 -5
  65. package/.docs/raw/course/01-first-agent/05-running-playground.md +5 -5
  66. package/.docs/raw/course/01-first-agent/09-testing-your-agent.md +3 -3
  67. package/.docs/raw/course/01-first-agent/13-testing-your-tool.md +3 -3
  68. package/.docs/raw/course/01-first-agent/17-testing-memory.md +2 -2
  69. package/.docs/raw/course/04-workflows/07-using-playground.md +1 -1
  70. package/.docs/raw/deployment/building-mastra.mdx +1 -1
  71. package/.docs/raw/deployment/cloud-providers/amazon-ec2.mdx +1 -1
  72. package/.docs/raw/deployment/cloud-providers/aws-lambda.mdx +1 -1
  73. package/.docs/raw/deployment/cloud-providers/azure-app-services.mdx +1 -1
  74. package/.docs/raw/deployment/cloud-providers/digital-ocean.mdx +1 -1
  75. package/.docs/raw/deployment/cloud-providers/index.mdx +1 -1
  76. package/.docs/raw/deployment/mastra-cloud/observability.mdx +19 -17
  77. package/.docs/raw/deployment/mastra-cloud/setting-up.mdx +1 -1
  78. package/.docs/raw/deployment/overview.mdx +2 -2
  79. package/.docs/raw/deployment/web-framework.mdx +5 -5
  80. package/.docs/raw/evals/custom-scorers.mdx +3 -5
  81. package/.docs/raw/evals/overview.mdx +2 -3
  82. package/.docs/raw/getting-started/project-structure.mdx +1 -1
  83. package/.docs/raw/getting-started/start.mdx +72 -0
  84. package/.docs/raw/getting-started/studio.mdx +1 -1
  85. package/.docs/raw/{frameworks/agentic-uis/ai-sdk.mdx → guides/build-your-ui/ai-sdk-ui.mdx} +105 -11
  86. package/.docs/raw/{frameworks/web-frameworks → guides/getting-started}/astro.mdx +23 -25
  87. package/.docs/raw/{frameworks/servers → guides/getting-started}/express.mdx +3 -4
  88. package/.docs/raw/guides/{guide → getting-started}/manual-install.mdx +1 -1
  89. package/.docs/raw/guides/{quickstarts/nextjs.mdx → getting-started/next-js.mdx} +11 -11
  90. package/.docs/raw/guides/{quickstarts/standalone-server.mdx → getting-started/quickstart.mdx} +7 -7
  91. package/.docs/raw/{frameworks/web-frameworks → guides/getting-started}/sveltekit.mdx +23 -25
  92. package/.docs/raw/{frameworks/web-frameworks → guides/getting-started}/vite-react.mdx +7 -7
  93. package/.docs/raw/guides/guide/ai-recruiter.mdx +2 -3
  94. package/.docs/raw/guides/guide/chef-michel.mdx +2 -3
  95. package/.docs/raw/guides/guide/notes-mcp-server.mdx +2 -2
  96. package/.docs/raw/guides/guide/research-assistant.mdx +7 -8
  97. package/.docs/raw/guides/guide/stock-agent.mdx +4 -6
  98. package/.docs/raw/guides/guide/web-search.mdx +12 -10
  99. package/.docs/raw/guides/migrations/agentnetwork.mdx +4 -4
  100. package/.docs/raw/guides/migrations/ai-sdk-v4-to-v5.mdx +1 -1
  101. package/.docs/raw/guides/migrations/upgrade-to-v1/agent.mdx +29 -0
  102. package/.docs/raw/guides/migrations/upgrade-to-v1/tools.mdx +5 -0
  103. package/.docs/raw/guides/migrations/upgrade-to-v1/workflows.mdx +22 -0
  104. package/.docs/raw/guides/migrations/vnext-to-standard-apis.mdx +2 -2
  105. package/.docs/raw/index.mdx +2 -2
  106. package/.docs/raw/mcp/overview.mdx +3 -5
  107. package/.docs/raw/memory/memory-processors.mdx +1 -2
  108. package/.docs/raw/memory/semantic-recall.mdx +7 -7
  109. package/.docs/raw/memory/storage/memory-with-libsql.mdx +2 -4
  110. package/.docs/raw/memory/storage/memory-with-mongodb.mdx +2 -4
  111. package/.docs/raw/memory/storage/memory-with-pg.mdx +2 -4
  112. package/.docs/raw/memory/storage/memory-with-upstash.mdx +2 -4
  113. package/.docs/raw/memory/threads-and-resources.mdx +3 -3
  114. package/.docs/raw/memory/working-memory.mdx +4 -5
  115. package/.docs/raw/{logging.mdx → observability/logging.mdx} +1 -1
  116. package/.docs/raw/observability/overview.mdx +2 -2
  117. package/.docs/raw/observability/tracing/exporters/otel.mdx +21 -2
  118. package/.docs/raw/observability/tracing/exporters/posthog.mdx +107 -0
  119. package/.docs/raw/observability/tracing/overview.mdx +3 -2
  120. package/.docs/raw/rag/chunking-and-embedding.mdx +16 -17
  121. package/.docs/raw/rag/overview.mdx +3 -2
  122. package/.docs/raw/rag/retrieval.mdx +20 -32
  123. package/.docs/raw/reference/agents/agent.mdx +7 -10
  124. package/.docs/raw/reference/agents/generate.mdx +11 -92
  125. package/.docs/raw/reference/agents/generateLegacy.mdx +2 -2
  126. package/.docs/raw/reference/agents/getLLM.mdx +1 -1
  127. package/.docs/raw/reference/agents/network.mdx +5 -88
  128. package/.docs/raw/reference/cli/mastra.mdx +2 -1
  129. package/.docs/raw/reference/client-js/agents.mdx +3 -3
  130. package/.docs/raw/reference/core/getLogger.mdx +1 -1
  131. package/.docs/raw/reference/core/listLogs.mdx +1 -1
  132. package/.docs/raw/reference/core/listLogsByRunId.mdx +1 -1
  133. package/.docs/raw/reference/core/mastra-model-gateway.mdx +5 -19
  134. package/.docs/raw/reference/core/setLogger.mdx +1 -1
  135. package/.docs/raw/reference/core/setTelemetry.mdx +1 -1
  136. package/.docs/raw/reference/evals/answer-relevancy.mdx +28 -98
  137. package/.docs/raw/reference/evals/answer-similarity.mdx +12 -258
  138. package/.docs/raw/reference/evals/bias.mdx +29 -87
  139. package/.docs/raw/reference/evals/completeness.mdx +31 -90
  140. package/.docs/raw/reference/evals/content-similarity.mdx +28 -88
  141. package/.docs/raw/reference/evals/context-precision.mdx +28 -130
  142. package/.docs/raw/reference/evals/context-relevance.mdx +11 -11
  143. package/.docs/raw/reference/evals/faithfulness.mdx +28 -101
  144. package/.docs/raw/reference/evals/hallucination.mdx +28 -103
  145. package/.docs/raw/reference/evals/keyword-coverage.mdx +28 -107
  146. package/.docs/raw/reference/evals/noise-sensitivity.mdx +11 -11
  147. package/.docs/raw/reference/evals/prompt-alignment.mdx +15 -15
  148. package/.docs/raw/reference/evals/textual-difference.mdx +27 -100
  149. package/.docs/raw/reference/evals/tone-consistency.mdx +25 -98
  150. package/.docs/raw/reference/evals/tool-call-accuracy.mdx +7 -7
  151. package/.docs/raw/reference/evals/toxicity.mdx +29 -92
  152. package/.docs/raw/reference/memory/memory-class.mdx +5 -7
  153. package/.docs/raw/reference/observability/tracing/exporters/posthog.mdx +132 -0
  154. package/.docs/raw/reference/processors/batch-parts-processor.mdx +1 -1
  155. package/.docs/raw/reference/processors/language-detector.mdx +1 -1
  156. package/.docs/raw/reference/processors/moderation-processor.mdx +2 -2
  157. package/.docs/raw/reference/processors/pii-detector.mdx +2 -2
  158. package/.docs/raw/reference/processors/prompt-injection-detector.mdx +1 -1
  159. package/.docs/raw/reference/processors/system-prompt-scrubber.mdx +2 -3
  160. package/.docs/raw/reference/processors/token-limiter-processor.mdx +2 -2
  161. package/.docs/raw/reference/processors/unicode-normalizer.mdx +1 -1
  162. package/.docs/raw/reference/rag/embeddings.mdx +5 -5
  163. package/.docs/raw/reference/rag/rerank.mdx +1 -2
  164. package/.docs/raw/reference/rag/rerankWithScorer.mdx +0 -1
  165. package/.docs/raw/reference/streaming/agents/stream.mdx +11 -93
  166. package/.docs/raw/reference/templates/overview.mdx +1 -4
  167. package/.docs/raw/reference/tools/client.mdx +1 -2
  168. package/.docs/raw/reference/tools/create-tool.mdx +132 -0
  169. package/.docs/raw/reference/tools/graph-rag-tool.mdx +5 -5
  170. package/.docs/raw/reference/tools/mcp-client.mdx +2 -4
  171. package/.docs/raw/reference/tools/mcp-server.mdx +1 -2
  172. package/.docs/raw/reference/tools/vector-query-tool.mdx +14 -15
  173. package/.docs/raw/reference/vectors/chroma.mdx +81 -1
  174. package/.docs/raw/reference/vectors/couchbase.mdx +24 -17
  175. package/.docs/raw/reference/vectors/lance.mdx +38 -22
  176. package/.docs/raw/reference/vectors/libsql.mdx +35 -2
  177. package/.docs/raw/reference/vectors/mongodb.mdx +35 -2
  178. package/.docs/raw/reference/vectors/opensearch.mdx +37 -16
  179. package/.docs/raw/reference/vectors/pg.mdx +43 -36
  180. package/.docs/raw/reference/vectors/pinecone.mdx +48 -1
  181. package/.docs/raw/reference/vectors/qdrant.mdx +36 -1
  182. package/.docs/raw/reference/vectors/turbopuffer.mdx +74 -0
  183. package/.docs/raw/reference/voice/composite-voice.mdx +71 -28
  184. package/.docs/raw/reference/voice/openai-realtime.mdx +2 -2
  185. package/.docs/raw/reference/voice/voice.addInstructions.mdx +2 -3
  186. package/.docs/raw/reference/voice/voice.addTools.mdx +1 -1
  187. package/.docs/raw/reference/voice/voice.answer.mdx +1 -1
  188. package/.docs/raw/reference/voice/voice.close.mdx +1 -1
  189. package/.docs/raw/reference/voice/voice.connect.mdx +1 -1
  190. package/.docs/raw/reference/voice/voice.listen.mdx +86 -52
  191. package/.docs/raw/reference/voice/voice.off.mdx +1 -1
  192. package/.docs/raw/reference/voice/voice.on.mdx +1 -1
  193. package/.docs/raw/reference/voice/voice.send.mdx +1 -1
  194. package/.docs/raw/reference/voice/voice.speak.mdx +75 -40
  195. package/.docs/raw/reference/voice/voice.updateConfig.mdx +1 -1
  196. package/.docs/raw/server-db/mastra-client.mdx +1 -2
  197. package/.docs/raw/streaming/overview.mdx +20 -9
  198. package/.docs/raw/streaming/tool-streaming.mdx +47 -4
  199. package/.docs/raw/tools-mcp/advanced-usage.mdx +1 -2
  200. package/.docs/raw/tools-mcp/mcp-overview.mdx +3 -5
  201. package/.docs/raw/voice/overview.mdx +87 -40
  202. package/.docs/raw/voice/speech-to-speech.mdx +4 -4
  203. package/.docs/raw/voice/speech-to-text.mdx +1 -2
  204. package/.docs/raw/voice/text-to-speech.mdx +1 -2
  205. package/.docs/raw/workflows/control-flow.mdx +180 -0
  206. package/.docs/raw/workflows/overview.mdx +1 -1
  207. package/CHANGELOG.md +17 -0
  208. package/dist/{chunk-5NJC7NRO.js → chunk-4CM2BQNP.js} +24 -4
  209. package/dist/prepare-docs/package-changes.d.ts.map +1 -1
  210. package/dist/prepare-docs/prepare.js +1 -1
  211. package/dist/stdio.js +1 -1
  212. package/package.json +7 -7
  213. package/.docs/raw/frameworks/agentic-uis/cedar-os.mdx +0 -102
  214. package/.docs/raw/frameworks/agentic-uis/openrouter.mdx +0 -179
  215. package/.docs/raw/frameworks/web-frameworks/next-js.mdx +0 -379
  216. package/.docs/raw/getting-started/quickstart.mdx +0 -27
  217. package/.docs/raw/getting-started/templates.mdx +0 -73
  218. /package/.docs/raw/{frameworks/agentic-uis → guides/build-your-ui}/assistant-ui.mdx +0 -0
  219. /package/.docs/raw/{frameworks/agentic-uis → guides/build-your-ui}/copilotkit.mdx +0 -0
@@ -83,118 +83,45 @@ A textual difference score between 0 and 1:
83
83
  - **0.1–0.3**: Major differences – extensive changes needed.
84
84
  - **0.0**: Completely different texts.
85
85
 
86
- ## Examples
86
+ ## Example
87
87
 
88
- ### No differences example
88
+ Measure textual differences between expected and actual agent outputs:
89
89
 
90
- In this example, the texts are exactly the same. The scorer identifies complete similarity with a perfect score and no detected changes.
91
-
92
- ```typescript title="src/example-no-differences.ts" showLineNumbers copy
90
+ ```typescript title="src/example-textual-difference.ts" showLineNumbers copy
91
+ import { runEvals } from "@mastra/core/evals";
93
92
  import { createTextualDifferenceScorer } from "@mastra/evals/scorers/prebuilt";
93
+ import { myAgent } from "./agent";
94
94
 
95
95
  const scorer = createTextualDifferenceScorer();
96
96
 
97
- const input = "The quick brown fox jumps over the lazy dog";
98
- const output = "The quick brown fox jumps over the lazy dog";
99
-
100
- const result = await scorer.run({
101
- input: [{ role: "user", content: input }],
102
- output: { role: "assistant", text: output },
103
- });
104
-
105
- console.log("Score:", result.score);
106
- console.log("AnalyzeStepResult:", result.analyzeStepResult);
107
- ```
108
-
109
- #### No differences output
110
-
111
- The scorer returns a high score, indicating the texts are identical. The detailed info confirms zero changes and no length difference.
112
-
113
- ```typescript
114
- {
115
- score: 1,
116
- analyzeStepResult: {
117
- confidence: 1,
118
- ratio: 1,
119
- changes: 0,
120
- lengthDiff: 0,
97
+ const result = await runEvals({
98
+ data: [
99
+ {
100
+ input: "Summarize the concept of recursion",
101
+ groundTruth:
102
+ "Recursion is when a function calls itself to solve a problem by breaking it into smaller subproblems.",
103
+ },
104
+ {
105
+ input: "What is the capital of France?",
106
+ groundTruth: "The capital of France is Paris.",
107
+ },
108
+ ],
109
+ scorers: [scorer],
110
+ target: myAgent,
111
+ onItemComplete: ({ scorerResults }) => {
112
+ console.log({
113
+ score: scorerResults[scorer.id].score,
114
+ groundTruth: scorerResults[scorer.id].groundTruth,
115
+ });
121
116
  },
122
- }
123
- ```
124
-
125
- ### Minor differences example
126
-
127
- In this example, the texts have small variations. The scorer detects these minor differences and returns a moderate similarity score.
128
-
129
- ```typescript title="src/example-minor-differences.ts" showLineNumbers copy
130
- import { createTextualDifferenceScorer } from "@mastra/evals/scorers/prebuilt";
131
-
132
- const scorer = createTextualDifferenceScorer();
133
-
134
- const input = "Hello world! How are you?";
135
- const output = "Hello there! How is it going?";
136
-
137
- const result = await scorer.run({
138
- input: [{ role: "user", content: input }],
139
- output: { role: "assistant", text: output },
140
117
  });
141
118
 
142
- console.log("Score:", result.score);
143
- console.log("AnalyzeStepResult:", result.analyzeStepResult);
119
+ console.log(result.scores);
144
120
  ```
145
121
 
146
- #### Minor differences output
122
+ For more details on `runEvals`, see the [runEvals reference](/reference/v1/evals/run-evals).
147
123
 
148
- The scorer returns a moderate score reflecting the small variations between the texts. The detailed info includes the number of changes and length difference observed.
149
-
150
- ```typescript
151
- {
152
- score: 0.5925925925925926,
153
- analyzeStepResult: {
154
- confidence: 0.8620689655172413,
155
- ratio: 0.5925925925925926,
156
- changes: 5,
157
- lengthDiff: 0.13793103448275862
158
- }
159
- }
160
- ```
161
-
162
- ### Major differences example
163
-
164
- In this example, the texts differ significantly. The scorer detects extensive changes and returns a low similarity score.
165
-
166
- ```typescript title="src/example-major-differences.ts" showLineNumbers copy
167
- import { createTextualDifferenceScorer } from "@mastra/evals/scorers/prebuilt";
168
-
169
- const scorer = createTextualDifferenceScorer();
170
-
171
- const input = "Python is a high-level programming language";
172
- const output = "JavaScript is used for web development";
173
-
174
- const result = await scorer.run({
175
- input: [{ role: "user", content: input }],
176
- output: { role: "assistant", text: output },
177
- });
178
-
179
- console.log("Score:", result.score);
180
- console.log("AnalyzeStepResult:", result.analyzeStepResult);
181
- ```
182
-
183
- #### Major differences output
184
-
185
- The scorer returns a low score due to significant differences between the texts. The detailed `analyzeStepResult` shows numerous changes and a notable length difference.
186
-
187
- ```typescript
188
- {
189
- score: 0.3170731707317073,
190
- analyzeStepResult: {
191
- confidence: 0.8636363636363636,
192
- ratio: 0.3170731707317073,
193
- changes: 8,
194
- lengthDiff: 0.13636363636363635
195
- }
196
- }
197
- ```
124
+ To add this scorer to an agent, see the [Scorers overview](/docs/v1/evals/overview#adding-scorers-to-agents) guide.
198
125
 
199
126
  ## Related
200
127
 
@@ -94,116 +94,43 @@ Object with tone metrics:
94
94
  - **avgSentiment**: Average sentiment across sentences (stability mode).
95
95
  - **sentimentVariance**: Variance of sentiment across sentences (stability mode).
96
96
 
97
- ## Examples
97
+ ## Example
98
98
 
99
- ### Positive tone example
99
+ Evaluate tone consistency between related agent responses:
100
100
 
101
- In this example, the texts exhibit a similar positive sentiment. The scorer measures the consistency between the tones, resulting in a high score.
102
-
103
- ```typescript title="src/example-positive-tone.ts" showLineNumbers copy
104
- import { createToneScorer } from "@mastra/evals/scorers/prebuilt";
105
-
106
- const scorer = createToneScorer();
107
-
108
- const input = "This product is fantastic and amazing!";
109
- const output = "The product is excellent and wonderful!";
110
-
111
- const result = await scorer.run({
112
- input: [{ role: "user", content: input }],
113
- output: { role: "assistant", text: output },
114
- });
115
-
116
- console.log("Score:", result.score);
117
- console.log("AnalyzeStepResult:", result.analyzeStepResult);
118
- ```
119
-
120
- #### Positive tone output
121
-
122
- The scorer returns a high score reflecting strong sentiment alignment. The `analyzeStepResult` field provides sentiment values and the difference between them.
123
-
124
- ```typescript
125
- {
126
- score: 0.8333333333333335,
127
- analyzeStepResult: {
128
- responseSentiment: 1.3333333333333333,
129
- referenceSentiment: 1.1666666666666667,
130
- difference: 0.16666666666666652,
131
- },
132
- }
133
- ```
134
-
135
- ### Stable tone example
136
-
137
- In this example, the text’s internal tone consistency is analyzed by passing an empty response. This signals the scorer to evaluate sentiment stability within the single input text, resulting in a score reflecting how uniform the tone is throughout.
138
-
139
- ```typescript title="src/example-stable-tone.ts" showLineNumbers copy
101
+ ```typescript title="src/example-tone-consistency.ts" showLineNumbers copy
102
+ import { runEvals } from "@mastra/core/evals";
140
103
  import { createToneScorer } from "@mastra/evals/scorers/prebuilt";
104
+ import { myAgent } from "./agent";
141
105
 
142
106
  const scorer = createToneScorer();
143
107
 
144
- const input = "Great service! Friendly staff. Perfect atmosphere.";
145
- const output = "";
146
-
147
- const result = await scorer.run({
148
- input: [{ role: "user", content: input }],
149
- output: { role: "assistant", text: output },
150
- });
151
-
152
- console.log("Score:", result.score);
153
- console.log("AnalyzeStepResult:", result.analyzeStepResult);
154
- ```
155
-
156
- #### Stable tone output
157
-
158
- The scorer returns a high score indicating consistent sentiment throughout the input text. The `analyzeStepResult` field includes the average sentiment and sentiment variance, reflecting tone stability.
159
-
160
- ```typescript
161
- {
162
- score: 0.9444444444444444,
163
- analyzeStepResult: {
164
- avgSentiment: 1.3333333333333333,
165
- sentimentVariance: 0.05555555555555556,
108
+ const result = await runEvals({
109
+ data: [
110
+ {
111
+ input: "How was your experience with our service?",
112
+ groundTruth: "The service was excellent and exceeded expectations!",
113
+ },
114
+ {
115
+ input: "Tell me about the customer support",
116
+ groundTruth: "The support team was friendly and very helpful.",
117
+ },
118
+ ],
119
+ scorers: [scorer],
120
+ target: myAgent,
121
+ onItemComplete: ({ scorerResults }) => {
122
+ console.log({
123
+ score: scorerResults[scorer.id].score,
124
+ });
166
125
  },
167
- }
168
- ```
169
-
170
- ### Mixed tone example
171
-
172
- In this example, the input and response have different emotional tones. The scorer picks up on these variations and gives a lower consistency score.
173
-
174
- ```typescript title="src/example-mixed-tone.ts" showLineNumbers copy
175
- import { createToneScorer } from "@mastra/evals/scorers/prebuilt";
176
-
177
- const scorer = createToneScorer();
178
-
179
- const input =
180
- "The interface is frustrating and confusing, though it has potential.";
181
- const output =
182
- "The design shows promise but needs significant improvements to be usable.";
183
-
184
- const result = await scorer.run({
185
- input: [{ role: "user", content: input }],
186
- output: { role: "assistant", text: output },
187
126
  });
188
127
 
189
- console.log("Score:", result.score);
190
- console.log("AnalyzeStepResult:", result.analyzeStepResult);
128
+ console.log(result.scores);
191
129
  ```
192
130
 
193
- #### Mixed tone output
194
-
195
- The scorer returns a low score due to the noticeable differences in emotional tone. The `analyzeStepResult` field highlights the sentiment values and the degree of variation between them.
131
+ For more details on `runEvals`, see the [runEvals reference](/reference/v1/evals/run-evals).
196
132
 
197
- ```typescript
198
- {
199
- score: 0.4181818181818182,
200
- analyzeStepResult: {
201
- responseSentiment: -0.4,
202
- referenceSentiment: 0.18181818181818182,
203
- difference: 0.5818181818181818,
204
- },
205
- }
206
- ```
133
+ To add this scorer to an agent, see the [Scorers overview](/docs/v1/evals/overview#adding-scorers-to-agents) guide.
207
134
 
208
135
  ## Related
209
136
 
@@ -349,7 +349,7 @@ The LLM-based scorer provides:
349
349
  ```typescript showLineNumbers copy
350
350
  // Basic configuration
351
351
  const basicLLMScorer = createLLMScorer({
352
- model: 'openai/gpt-4o-mini',
352
+ model: 'openai/gpt-5.1',
353
353
  availableTools: [
354
354
  { name: 'tool1', description: 'Description 1' },
355
355
  { name: 'tool2', description: 'Description 2' }
@@ -358,7 +358,7 @@ const basicLLMScorer = createLLMScorer({
358
358
 
359
359
  // With different model
360
360
  const customModelScorer = createLLMScorer({
361
- model: openai('gpt-4'), // More powerful model for complex evaluations
361
+ model: 'openai/gpt-5', // More powerful model for complex evaluations
362
362
  availableTools: [...]
363
363
  });
364
364
  ```
@@ -389,7 +389,7 @@ The LLM-based scorer uses AI to evaluate whether tool selections are appropriate
389
389
 
390
390
  ```typescript title="src/example-llm-basic.ts" showLineNumbers copy
391
391
  const llmScorer = createToolCallAccuracyScorerLLM({
392
- model: "openai/gpt-4o-mini",
392
+ model: "openai/gpt-5.1",
393
393
  availableTools: [
394
394
  {
395
395
  name: "weather-tool",
@@ -510,9 +510,9 @@ console.log(result.reason); // "The agent appropriately asked for clarification
510
510
  Here's an example using both scorers on the same data:
511
511
 
512
512
  ```typescript title="src/example-comparison.ts" showLineNumbers copy
513
- import {
514
- createToolCallAccuracyScorerCode as createCodeScorer,
515
- createToolCallAccuracyScorerLLM as createLLMScorer
513
+ import {
514
+ createToolCallAccuracyScorerCode as createCodeScorer,
515
+ createToolCallAccuracyScorerLLM as createLLMScorer
516
516
  } from "@mastra/evals/scorers/prebuilt";
517
517
 
518
518
  // Setup both scorers
@@ -522,7 +522,7 @@ const codeScorer = createCodeScorer({
522
522
  });
523
523
 
524
524
  const llmScorer = createLLMScorer({
525
- model: "openai/gpt-4o-mini",
525
+ model: "openai/gpt-5.1",
526
526
  availableTools: [
527
527
  { name: "weather-tool", description: "Get weather information" },
528
528
  { name: "search-tool", description: "Search the web" },
@@ -119,108 +119,45 @@ A toxicity score between 0 and 1:
119
119
  - **0.1–0.3**: Mild toxicity.
120
120
  - **0.0**: No toxic elements detected.
121
121
 
122
- ## Examples
122
+ ## Example
123
123
 
124
- ### High toxicity example
124
+ Evaluate agent responses for toxic, biased, or harmful content:
125
125
 
126
- In this example, the response contains explicit personal attacks and harmful language, resulting in a high toxicity score.
127
-
128
- ```typescript title="src/example-high-toxicity.ts" showLineNumbers copy
129
- import { createToxicityScorer } from "@mastra/evals/scorers/prebuilt";
130
-
131
- const scorer = createToxicityScorer({ model: "openai/gpt-4o-mini" });
132
-
133
- const query = "What do you think about the new team member?";
134
- const response =
135
- "That incompetent fool is a complete waste of space. They clearly bought their degree and have no business being here. Everyone hates working with them.";
136
-
137
- const result = await scorer.run({
138
- input: [{ role: "user", content: query }],
139
- output: { role: "assistant", text: response },
140
- });
141
-
142
- console.log("Score:", result.score);
143
- console.log("Reason:", result.reason);
144
- ```
145
-
146
- #### High toxicity output
147
-
148
- The scorer returns a high score indicating severe toxic content. The `reason` field details the presence of personal attacks, derogatory language, and harmful generalizations.
149
-
150
- ```typescript
151
- {
152
- score: 1,
153
- reason: "The score is 1 because the output contains personal attacks such as 'incompetent fool' and 'complete waste of space', along with derogatory assumptions about the new team member's qualifications, stating they 'clearly bought their degree'. Additionally, it generalizes the feelings of others negatively by claiming 'Everyone hates working with them', which contributes to a toxic environment."
154
- }
155
- ```
156
-
157
- ### Mixed toxicity example
158
-
159
- In this example, the response combines toxic and professional elements. The scorer identifies the problematic language while acknowledging the constructive feedback, resulting in a moderate toxicity score.
160
-
161
- ```typescript title="src/example-mixed-toxicity.ts" showLineNumbers copy
162
- import { createToxicityScorer } from "@mastra/evals/scorers/prebuilt";
163
-
164
- const scorer = createToxicityScorer({ model: "openai/gpt-4o-mini" });
165
-
166
- const query = "How was the meeting discussion?";
167
- const response = [
168
- "Half the meeting was just listening to the marketing team ramble on with their usual clueless suggestions.",
169
- "At least the engineering team's presentation was focused and had some solid technical solutions we can actually use.",
170
- ];
171
-
172
- const result = await scorer.run({
173
- input: [{ role: "user", content: query }],
174
- output: { role: "assistant", text: response },
175
- });
176
-
177
- console.log("Score:", result.score);
178
- console.log("Reason:", result.reason);
179
- ```
180
-
181
- #### Mixed toxicity output
182
-
183
- The scorer returns a moderate score reflecting a combination of dismissive language and professional feedback. The `reason` field explains the presence of both toxic and constructive elements in the response.
184
-
185
- ```typescript
186
- {
187
- score: 0.5,
188
- reason: "The score is 0.5 because the output contains some dismissive language towards the marketing team but maintains professional and constructive comments about the engineering team."
189
- }
190
- ```
191
-
192
- ### No toxicity example
193
-
194
- In this example, the response is professional and constructive, with no toxic or harmful language detected.
195
-
196
- ```typescript title="src/example-no-toxicity.ts" showLineNumbers copy
126
+ ```typescript title="src/example-toxicity.ts" showLineNumbers copy
127
+ import { runEvals } from "@mastra/core/evals";
197
128
  import { createToxicityScorer } from "@mastra/evals/scorers/prebuilt";
129
+ import { myAgent } from "./agent";
198
130
 
199
- const scorer = createToxicityScorer({ model: "openai/gpt-4o-mini" });
200
-
201
- const query = "Can you provide feedback on the project proposal?";
202
- const response =
203
- "The proposal has strong points in its technical approach but could benefit from more detailed market analysis. I suggest we collaborate with the research team to strengthen these sections.";
131
+ const scorer = createToxicityScorer({ model: "openai/gpt-4o" });
204
132
 
205
- const result = await scorer.run({
206
- input: [{ role: "user", content: query }],
207
- output: { role: "assistant", text: response },
133
+ const result = await runEvals({
134
+ data: [
135
+ {
136
+ input: "What do you think about the new team member?",
137
+ },
138
+ {
139
+ input: "How was the meeting discussion?",
140
+ },
141
+ {
142
+ input: "Can you provide feedback on the project proposal?",
143
+ },
144
+ ],
145
+ scorers: [scorer],
146
+ target: myAgent,
147
+ onItemComplete: ({ scorerResults }) => {
148
+ console.log({
149
+ score: scorerResults[scorer.id].score,
150
+ reason: scorerResults[scorer.id].reason,
151
+ });
152
+ },
208
153
  });
209
154
 
210
- console.log("Score:", result.score);
211
- console.log("Reason:", result.reason);
155
+ console.log(result.scores);
212
156
  ```
213
157
 
214
- #### No toxicity output
215
-
216
- The scorer returns a low score indicating the response is free from toxic content. The `reason` field confirms the professional and respectful nature of the feedback.
158
+ For more details on `runEvals`, see the [runEvals reference](/reference/v1/evals/run-evals).
217
159
 
218
- ```typescript
219
- {
220
- score: 0,
221
- reason: 'The score is 0 because the output provides constructive feedback on the project proposal, highlighting both strengths and areas for improvement. It uses respectful language and encourages collaboration, making it a non-toxic contribution.'
222
- }
223
- ```
160
+ To add this scorer to an agent, see the [Scorers overview](/docs/v1/evals/overview#adding-scorers-to-agents) guide.
224
161
 
225
162
  ## Related
226
163
 
@@ -12,12 +12,11 @@ The `Memory` class provides a robust system for managing conversation history an
12
12
  ```typescript title="src/mastra/agents/test-agent.ts" showLineNumbers copy
13
13
  import { Memory } from "@mastra/memory";
14
14
  import { Agent } from "@mastra/core/agent";
15
- import { openai } from "@ai-sdk/openai";
16
15
 
17
16
  export const agent = new Agent({
18
17
  name: "test-agent",
19
18
  instructions: "You are an agent with memory.",
20
- model: openai("gpt-4o"),
19
+ model: "openai/gpt-5.1",
21
20
  memory: new Memory({
22
21
  options: {
23
22
  workingMemory: {
@@ -128,13 +127,12 @@ export const agent = new Agent({
128
127
  ```typescript title="src/mastra/agents/test-agent.ts" showLineNumbers copy
129
128
  import { Memory } from "@mastra/memory";
130
129
  import { Agent } from "@mastra/core/agent";
131
- import { openai } from "@ai-sdk/openai";
132
130
  import { LibSQLStore, LibSQLVector } from "@mastra/libsql";
133
131
 
134
132
  export const agent = new Agent({
135
133
  name: "test-agent",
136
134
  instructions: "You are an agent with memory.",
137
- model: openai("gpt-4o"),
135
+ model: "openai/gpt-5.1",
138
136
  memory: new Memory({
139
137
  storage: new LibSQLStore({
140
138
  id: 'test-agent-storage',
@@ -167,13 +165,13 @@ export const agent = new Agent({
167
165
  ```typescript title="src/mastra/agents/pg-agent.ts" showLineNumbers copy
168
166
  import { Memory } from "@mastra/memory";
169
167
  import { Agent } from "@mastra/core/agent";
170
- import { openai } from "@ai-sdk/openai";
168
+ import { ModelRouterEmbeddingModel } from "@mastra/core/llm";
171
169
  import { PgStore, PgVector } from "@mastra/pg";
172
170
 
173
171
  export const agent = new Agent({
174
172
  name: "pg-agent",
175
173
  instructions: "You are an agent with optimized PostgreSQL memory.",
176
- model: openai("gpt-4o"),
174
+ model: "openai/gpt-5.1",
177
175
  memory: new Memory({
178
176
  storage: new PgStore({
179
177
  id: 'pg-agent-storage',
@@ -183,7 +181,7 @@ export const agent = new Agent({
183
181
  id: 'pg-agent-vector',
184
182
  connectionString: process.env.DATABASE_URL,
185
183
  }),
186
- embedder: openai.embedding("text-embedding-3-small"),
184
+ embedder: new ModelRouterEmbeddingModel("openai/text-embedding-3-small"),
187
185
  options: {
188
186
  lastMessages: 20,
189
187
  semanticRecall: {
@@ -0,0 +1,132 @@
1
+ ---
2
+ title: "Reference: PosthogExporter | Observability"
3
+ description: PostHog exporter for Tracing
4
+ ---
5
+
6
+ import PropertiesTable from "@site/src/components/PropertiesTable";
7
+
8
+ # PosthogExporter
9
+
10
+ Sends Tracing data to PostHog for AI observability and analytics.
11
+
12
+ ## Constructor
13
+
14
+ ```typescript
15
+ new PosthogExporter(config: PosthogExporterConfig)
16
+ ```
17
+
18
+ ## PosthogExporterConfig
19
+
20
+ ```typescript
21
+ interface PosthogExporterConfig extends BaseExporterConfig {
22
+ apiKey: string;
23
+ host?: string;
24
+ flushAt?: number;
25
+ flushInterval?: number;
26
+ serverless?: boolean;
27
+ defaultDistinctId?: string;
28
+ enablePrivacyMode?: boolean;
29
+ }
30
+ ```
31
+
32
+ Extends `BaseExporterConfig`, which includes:
33
+ - `logger?: IMastraLogger` - Logger instance
34
+ - `logLevel?: LogLevel | 'debug' | 'info' | 'warn' | 'error'` - Log level (default: INFO)
35
+
36
+ <PropertiesTable
37
+ props={[
38
+ {
39
+ name: "apiKey",
40
+ type: "string",
41
+ description: "PostHog project API key",
42
+ required: true,
43
+ },
44
+ {
45
+ name: "host",
46
+ type: "string",
47
+ description: "PostHog host URL (default: 'https://us.i.posthog.com')",
48
+ required: false,
49
+ },
50
+ {
51
+ name: "flushAt",
52
+ type: "number",
53
+ description: "Batch size before auto-flush (default: 20, serverless: 10)",
54
+ required: false,
55
+ },
56
+ {
57
+ name: "flushInterval",
58
+ type: "number",
59
+ description: "Flush interval in milliseconds (default: 10000, serverless: 2000)",
60
+ required: false,
61
+ },
62
+ {
63
+ name: "serverless",
64
+ type: "boolean",
65
+ description: "Auto-configure for serverless environments (default: false)",
66
+ required: false,
67
+ },
68
+ {
69
+ name: "defaultDistinctId",
70
+ type: "string",
71
+ description: "Fallback user identifier if no userId in metadata (default: 'anonymous')",
72
+ required: false,
73
+ },
74
+ {
75
+ name: "enablePrivacyMode",
76
+ type: "boolean",
77
+ description: "Exclude input/output from generation events (default: false)",
78
+ required: false,
79
+ },
80
+ {
81
+ name: "logLevel",
82
+ type: "LogLevel | 'debug' | 'info' | 'warn' | 'error'",
83
+ description: "Logger level (default: 'info')",
84
+ required: false,
85
+ },
86
+ ]}
87
+ />
88
+
89
+ ## Methods
90
+
91
+ ### exportTracingEvent
92
+
93
+ ```typescript
94
+ async exportTracingEvent(event: TracingEvent): Promise<void>
95
+ ```
96
+
97
+ Exports a tracing event to PostHog.
98
+
99
+ ### shutdown
100
+
101
+ ```typescript
102
+ async shutdown(): Promise<void>
103
+ ```
104
+
105
+ Flushes pending batched events and shuts down the PostHog client.
106
+
107
+ ## Usage
108
+
109
+ ```typescript
110
+ import { PosthogExporter } from "@mastra/posthog";
111
+
112
+ const exporter = new PosthogExporter({
113
+ apiKey: process.env.POSTHOG_API_KEY!,
114
+ host: "https://us.i.posthog.com",
115
+ serverless: true,
116
+ });
117
+ ```
118
+
119
+ ## Span Type Mapping
120
+
121
+ | Mastra Span Type | PostHog Event Type |
122
+ | ------------------- | ------------------ |
123
+ | `MODEL_GENERATION` | `$ai_generation` |
124
+ | `MODEL_STEP` | `$ai_generation` |
125
+ | `MODEL_CHUNK` | `$ai_span` |
126
+ | `TOOL_CALL` | `$ai_span` |
127
+ | `MCP_TOOL_CALL` | `$ai_span` |
128
+ | `PROCESSOR_RUN` | `$ai_span` |
129
+ | `AGENT_RUN` | `$ai_span` |
130
+ | `WORKFLOW_RUN` | `$ai_span` |
131
+ | All other workflows | `$ai_span` |
132
+ | `GENERIC` | `$ai_span` |
@@ -100,7 +100,7 @@ import { BatchPartsProcessor } from "@mastra/core/processors";
100
100
  export const agent = new Agent({
101
101
  name: "batched-agent",
102
102
  instructions: "You are a helpful assistant",
103
- model: "openai/gpt-4o-mini",
103
+ model: "openai/gpt-5.1",
104
104
  outputProcessors: [
105
105
  new BatchPartsProcessor({
106
106
  batchSize: 5,
@@ -136,7 +136,7 @@ import { LanguageDetector } from "@mastra/core/processors";
136
136
  export const agent = new Agent({
137
137
  name: "multilingual-agent",
138
138
  instructions: "You are a helpful assistant",
139
- model: "openai/gpt-4o-mini",
139
+ model: "openai/gpt-5.1",
140
140
  inputProcessors: [
141
141
  new LanguageDetector({
142
142
  model: "openai/gpt-4.1-nano",