@mastra/mcp-docs-server 1.1.5 → 1.1.6-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (556) hide show
  1. package/.docs/docs/agents/adding-voice.md +349 -0
  2. package/.docs/docs/agents/agent-approval.md +558 -0
  3. package/.docs/docs/agents/agent-memory.md +209 -0
  4. package/.docs/docs/agents/guardrails.md +374 -0
  5. package/.docs/docs/agents/network-approval.md +275 -0
  6. package/.docs/docs/agents/networks.md +299 -0
  7. package/.docs/docs/agents/overview.md +304 -0
  8. package/.docs/docs/agents/processors.md +622 -0
  9. package/.docs/docs/agents/structured-output.md +273 -0
  10. package/.docs/docs/agents/supervisor-agents.md +304 -0
  11. package/.docs/docs/agents/using-tools.md +214 -0
  12. package/.docs/docs/build-with-ai/mcp-docs-server.md +238 -0
  13. package/.docs/docs/build-with-ai/skills.md +35 -0
  14. package/.docs/docs/community/contributing-templates.md +3 -0
  15. package/.docs/docs/community/discord.md +9 -0
  16. package/.docs/docs/community/licensing.md +66 -0
  17. package/.docs/docs/deployment/cloud-providers.md +15 -0
  18. package/.docs/docs/deployment/mastra-server.md +122 -0
  19. package/.docs/docs/deployment/monorepo.md +142 -0
  20. package/.docs/docs/deployment/overview.md +62 -0
  21. package/.docs/docs/deployment/studio.md +239 -0
  22. package/.docs/docs/deployment/web-framework.md +52 -0
  23. package/.docs/docs/deployment/workflow-runners.md +9 -0
  24. package/.docs/docs/evals/built-in-scorers.md +47 -0
  25. package/.docs/docs/evals/custom-scorers.md +519 -0
  26. package/.docs/docs/evals/overview.md +141 -0
  27. package/.docs/docs/evals/running-in-ci.md +124 -0
  28. package/.docs/docs/getting-started/build-with-ai.md +68 -0
  29. package/.docs/docs/getting-started/manual-install.md +226 -0
  30. package/.docs/docs/getting-started/project-structure.md +60 -0
  31. package/.docs/docs/getting-started/start.md +28 -0
  32. package/.docs/docs/getting-started/studio.md +101 -0
  33. package/.docs/docs/index.md +43 -0
  34. package/.docs/docs/mastra-cloud/deployment.md +77 -0
  35. package/.docs/docs/mastra-cloud/observability.md +38 -0
  36. package/.docs/docs/mastra-cloud/overview.md +23 -0
  37. package/.docs/docs/mastra-cloud/setup.md +42 -0
  38. package/.docs/docs/mastra-cloud/studio.md +24 -0
  39. package/.docs/docs/mastra-code/configuration.md +299 -0
  40. package/.docs/docs/mastra-code/customization.md +228 -0
  41. package/.docs/docs/mastra-code/modes.md +104 -0
  42. package/.docs/docs/mastra-code/overview.md +135 -0
  43. package/.docs/docs/mastra-code/tools.md +229 -0
  44. package/.docs/docs/mcp/overview.md +373 -0
  45. package/.docs/docs/mcp/publishing-mcp-server.md +95 -0
  46. package/.docs/docs/memory/memory-processors.md +314 -0
  47. package/.docs/docs/memory/message-history.md +260 -0
  48. package/.docs/docs/memory/observational-memory.md +248 -0
  49. package/.docs/docs/memory/overview.md +45 -0
  50. package/.docs/docs/memory/semantic-recall.md +272 -0
  51. package/.docs/docs/memory/storage.md +261 -0
  52. package/.docs/docs/memory/working-memory.md +400 -0
  53. package/.docs/docs/observability/datasets/overview.md +198 -0
  54. package/.docs/docs/observability/datasets/running-experiments.md +274 -0
  55. package/.docs/docs/observability/logging.md +99 -0
  56. package/.docs/docs/observability/overview.md +70 -0
  57. package/.docs/docs/observability/tracing/bridges/otel.md +209 -0
  58. package/.docs/docs/observability/tracing/exporters/arize.md +272 -0
  59. package/.docs/docs/observability/tracing/exporters/braintrust.md +111 -0
  60. package/.docs/docs/observability/tracing/exporters/cloud.md +127 -0
  61. package/.docs/docs/observability/tracing/exporters/datadog.md +187 -0
  62. package/.docs/docs/observability/tracing/exporters/default.md +209 -0
  63. package/.docs/docs/observability/tracing/exporters/laminar.md +100 -0
  64. package/.docs/docs/observability/tracing/exporters/langfuse.md +213 -0
  65. package/.docs/docs/observability/tracing/exporters/langsmith.md +198 -0
  66. package/.docs/docs/observability/tracing/exporters/otel.md +476 -0
  67. package/.docs/docs/observability/tracing/exporters/posthog.md +148 -0
  68. package/.docs/docs/observability/tracing/exporters/sentry.md +208 -0
  69. package/.docs/docs/observability/tracing/overview.md +1112 -0
  70. package/.docs/docs/observability/tracing/processors/sensitive-data-filter.md +300 -0
  71. package/.docs/docs/rag/chunking-and-embedding.md +183 -0
  72. package/.docs/docs/rag/graph-rag.md +215 -0
  73. package/.docs/docs/rag/overview.md +72 -0
  74. package/.docs/docs/rag/retrieval.md +515 -0
  75. package/.docs/docs/rag/vector-databases.md +645 -0
  76. package/.docs/docs/server/auth/auth0.md +220 -0
  77. package/.docs/docs/server/auth/better-auth.md +203 -0
  78. package/.docs/docs/server/auth/clerk.md +132 -0
  79. package/.docs/docs/server/auth/composite-auth.md +234 -0
  80. package/.docs/docs/server/auth/custom-auth-provider.md +513 -0
  81. package/.docs/docs/server/auth/firebase.md +272 -0
  82. package/.docs/docs/server/auth/jwt.md +110 -0
  83. package/.docs/docs/server/auth/simple-auth.md +180 -0
  84. package/.docs/docs/server/auth/supabase.md +117 -0
  85. package/.docs/docs/server/auth/workos.md +186 -0
  86. package/.docs/docs/server/auth.md +38 -0
  87. package/.docs/docs/server/custom-adapters.md +378 -0
  88. package/.docs/docs/server/custom-api-routes.md +267 -0
  89. package/.docs/docs/server/mastra-client.md +243 -0
  90. package/.docs/docs/server/mastra-server.md +71 -0
  91. package/.docs/docs/server/middleware.md +225 -0
  92. package/.docs/docs/server/request-context.md +471 -0
  93. package/.docs/docs/server/server-adapters.md +547 -0
  94. package/.docs/docs/streaming/events.md +237 -0
  95. package/.docs/docs/streaming/overview.md +175 -0
  96. package/.docs/docs/streaming/tool-streaming.md +175 -0
  97. package/.docs/docs/streaming/workflow-streaming.md +109 -0
  98. package/.docs/docs/voice/overview.md +959 -0
  99. package/.docs/docs/voice/speech-to-speech.md +102 -0
  100. package/.docs/docs/voice/speech-to-text.md +79 -0
  101. package/.docs/docs/voice/text-to-speech.md +83 -0
  102. package/.docs/docs/workflows/agents-and-tools.md +166 -0
  103. package/.docs/docs/workflows/control-flow.md +822 -0
  104. package/.docs/docs/workflows/error-handling.md +360 -0
  105. package/.docs/docs/workflows/human-in-the-loop.md +215 -0
  106. package/.docs/docs/workflows/overview.md +370 -0
  107. package/.docs/docs/workflows/snapshots.md +238 -0
  108. package/.docs/docs/workflows/suspend-and-resume.md +205 -0
  109. package/.docs/docs/workflows/time-travel.md +309 -0
  110. package/.docs/docs/workflows/workflow-state.md +181 -0
  111. package/.docs/docs/workspace/filesystem.md +164 -0
  112. package/.docs/docs/workspace/overview.md +239 -0
  113. package/.docs/docs/workspace/sandbox.md +63 -0
  114. package/.docs/docs/workspace/search.md +243 -0
  115. package/.docs/docs/workspace/skills.md +169 -0
  116. package/.docs/guides/agent-frameworks/ai-sdk.md +140 -0
  117. package/.docs/guides/build-your-ui/ai-sdk-ui.md +1499 -0
  118. package/.docs/guides/build-your-ui/assistant-ui.md +156 -0
  119. package/.docs/guides/build-your-ui/copilotkit.md +289 -0
  120. package/.docs/guides/deployment/amazon-ec2.md +130 -0
  121. package/.docs/guides/deployment/aws-lambda.md +248 -0
  122. package/.docs/guides/deployment/azure-app-services.md +114 -0
  123. package/.docs/guides/deployment/cloudflare.md +99 -0
  124. package/.docs/guides/deployment/digital-ocean.md +168 -0
  125. package/.docs/guides/deployment/inngest.md +682 -0
  126. package/.docs/guides/deployment/netlify.md +77 -0
  127. package/.docs/guides/deployment/vercel.md +101 -0
  128. package/.docs/guides/getting-started/astro.md +398 -0
  129. package/.docs/guides/getting-started/electron.md +504 -0
  130. package/.docs/guides/getting-started/express.md +251 -0
  131. package/.docs/guides/getting-started/hono.md +190 -0
  132. package/.docs/guides/getting-started/next-js.md +347 -0
  133. package/.docs/guides/getting-started/nuxt.md +497 -0
  134. package/.docs/guides/getting-started/quickstart.md +67 -0
  135. package/.docs/guides/getting-started/sveltekit.md +296 -0
  136. package/.docs/guides/getting-started/vite-react.md +425 -0
  137. package/.docs/guides/guide/ai-recruiter.md +226 -0
  138. package/.docs/guides/guide/chef-michel.md +211 -0
  139. package/.docs/guides/guide/code-review-bot.md +226 -0
  140. package/.docs/guides/guide/dev-assistant.md +307 -0
  141. package/.docs/guides/guide/docs-manager.md +238 -0
  142. package/.docs/guides/guide/github-actions-pr-description.md +236 -0
  143. package/.docs/guides/guide/notes-mcp-server.md +416 -0
  144. package/.docs/guides/guide/research-assistant.md +348 -0
  145. package/.docs/guides/guide/research-coordinator.md +416 -0
  146. package/.docs/guides/guide/stock-agent.md +132 -0
  147. package/.docs/guides/guide/web-search.md +320 -0
  148. package/.docs/guides/guide/whatsapp-chat-bot.md +405 -0
  149. package/.docs/guides/index.md +3 -0
  150. package/.docs/guides/migrations/agentnetwork.md +97 -0
  151. package/.docs/guides/migrations/ai-sdk-v4-to-v5.md +112 -0
  152. package/.docs/guides/migrations/network-to-supervisor.md +261 -0
  153. package/.docs/guides/migrations/upgrade-to-v1/agent.md +404 -0
  154. package/.docs/guides/migrations/upgrade-to-v1/cli.md +57 -0
  155. package/.docs/guides/migrations/upgrade-to-v1/client.md +337 -0
  156. package/.docs/guides/migrations/upgrade-to-v1/deployment.md +37 -0
  157. package/.docs/guides/migrations/upgrade-to-v1/evals.md +239 -0
  158. package/.docs/guides/migrations/upgrade-to-v1/mastra.md +143 -0
  159. package/.docs/guides/migrations/upgrade-to-v1/mcp.md +97 -0
  160. package/.docs/guides/migrations/upgrade-to-v1/memory.md +285 -0
  161. package/.docs/guides/migrations/upgrade-to-v1/overview.md +119 -0
  162. package/.docs/guides/migrations/upgrade-to-v1/processors.md +68 -0
  163. package/.docs/guides/migrations/upgrade-to-v1/rag.md +42 -0
  164. package/.docs/guides/migrations/upgrade-to-v1/storage.md +553 -0
  165. package/.docs/guides/migrations/upgrade-to-v1/tools.md +180 -0
  166. package/.docs/guides/migrations/upgrade-to-v1/tracing.md +412 -0
  167. package/.docs/guides/migrations/upgrade-to-v1/vectors.md +87 -0
  168. package/.docs/guides/migrations/upgrade-to-v1/voice.md +30 -0
  169. package/.docs/guides/migrations/upgrade-to-v1/workflows.md +341 -0
  170. package/.docs/guides/migrations/vnext-to-standard-apis.md +362 -0
  171. package/.docs/models/embeddings.md +161 -0
  172. package/.docs/models/gateways/azure-openai.md +128 -0
  173. package/.docs/models/gateways/custom-gateways.md +545 -0
  174. package/.docs/models/gateways/netlify.md +88 -0
  175. package/.docs/models/gateways/openrouter.md +219 -0
  176. package/.docs/models/gateways/vercel.md +225 -0
  177. package/.docs/models/gateways.md +14 -0
  178. package/.docs/models/index.md +286 -0
  179. package/.docs/models/providers/302ai.md +134 -0
  180. package/.docs/models/providers/abacus.md +125 -0
  181. package/.docs/models/providers/agentrouter.md +90 -0
  182. package/.docs/models/providers/aihubmix.md +107 -0
  183. package/.docs/models/providers/alibaba-cn.md +135 -0
  184. package/.docs/models/providers/alibaba.md +111 -0
  185. package/.docs/models/providers/amazon-bedrock.md +33 -0
  186. package/.docs/models/providers/anthropic.md +153 -0
  187. package/.docs/models/providers/azure.md +33 -0
  188. package/.docs/models/providers/bailing.md +72 -0
  189. package/.docs/models/providers/baseten.md +77 -0
  190. package/.docs/models/providers/berget.md +78 -0
  191. package/.docs/models/providers/cerebras.md +101 -0
  192. package/.docs/models/providers/chutes.md +136 -0
  193. package/.docs/models/providers/cloudflare-ai-gateway.md +33 -0
  194. package/.docs/models/providers/cloudflare-workers-ai.md +109 -0
  195. package/.docs/models/providers/cohere.md +33 -0
  196. package/.docs/models/providers/cortecs.md +91 -0
  197. package/.docs/models/providers/deepinfra.md +112 -0
  198. package/.docs/models/providers/deepseek.md +88 -0
  199. package/.docs/models/providers/fastrouter.md +84 -0
  200. package/.docs/models/providers/fireworks-ai.md +89 -0
  201. package/.docs/models/providers/firmware.md +85 -0
  202. package/.docs/models/providers/friendli.md +78 -0
  203. package/.docs/models/providers/github-models.md +125 -0
  204. package/.docs/models/providers/google-vertex.md +33 -0
  205. package/.docs/models/providers/google.md +159 -0
  206. package/.docs/models/providers/groq.md +107 -0
  207. package/.docs/models/providers/helicone.md +161 -0
  208. package/.docs/models/providers/huggingface.md +90 -0
  209. package/.docs/models/providers/iflowcn.md +84 -0
  210. package/.docs/models/providers/inception.md +72 -0
  211. package/.docs/models/providers/inference.md +79 -0
  212. package/.docs/models/providers/io-intelligence.md +87 -0
  213. package/.docs/models/providers/io-net.md +87 -0
  214. package/.docs/models/providers/jiekou.md +131 -0
  215. package/.docs/models/providers/kilo.md +333 -0
  216. package/.docs/models/providers/kimi-for-coding.md +100 -0
  217. package/.docs/models/providers/kuae-cloud-coding-plan.md +71 -0
  218. package/.docs/models/providers/llama.md +77 -0
  219. package/.docs/models/providers/lmstudio.md +73 -0
  220. package/.docs/models/providers/lucidquery.md +72 -0
  221. package/.docs/models/providers/minimax-cn-coding-plan.md +102 -0
  222. package/.docs/models/providers/minimax-cn.md +102 -0
  223. package/.docs/models/providers/minimax-coding-plan.md +102 -0
  224. package/.docs/models/providers/minimax.md +104 -0
  225. package/.docs/models/providers/mistral.md +124 -0
  226. package/.docs/models/providers/moark.md +72 -0
  227. package/.docs/models/providers/modelscope.md +77 -0
  228. package/.docs/models/providers/moonshotai-cn.md +76 -0
  229. package/.docs/models/providers/moonshotai.md +76 -0
  230. package/.docs/models/providers/morph.md +73 -0
  231. package/.docs/models/providers/nano-gpt.md +103 -0
  232. package/.docs/models/providers/nebius.md +116 -0
  233. package/.docs/models/providers/nova.md +72 -0
  234. package/.docs/models/providers/novita-ai.md +154 -0
  235. package/.docs/models/providers/nvidia.md +141 -0
  236. package/.docs/models/providers/ollama-cloud.md +103 -0
  237. package/.docs/models/providers/ollama.md +33 -0
  238. package/.docs/models/providers/openai.md +193 -0
  239. package/.docs/models/providers/opencode.md +100 -0
  240. package/.docs/models/providers/ovhcloud.md +83 -0
  241. package/.docs/models/providers/perplexity.md +100 -0
  242. package/.docs/models/providers/poe.md +183 -0
  243. package/.docs/models/providers/privatemode-ai.md +75 -0
  244. package/.docs/models/providers/requesty.md +90 -0
  245. package/.docs/models/providers/scaleway.md +84 -0
  246. package/.docs/models/providers/siliconflow-cn.md +138 -0
  247. package/.docs/models/providers/siliconflow.md +140 -0
  248. package/.docs/models/providers/stackit.md +78 -0
  249. package/.docs/models/providers/stepfun.md +73 -0
  250. package/.docs/models/providers/submodel.md +79 -0
  251. package/.docs/models/providers/synthetic.md +96 -0
  252. package/.docs/models/providers/togetherai.md +115 -0
  253. package/.docs/models/providers/upstage.md +73 -0
  254. package/.docs/models/providers/venice.md +95 -0
  255. package/.docs/models/providers/vivgrid.md +106 -0
  256. package/.docs/models/providers/vultr.md +75 -0
  257. package/.docs/models/providers/wandb.md +80 -0
  258. package/.docs/models/providers/xai.md +141 -0
  259. package/.docs/models/providers/xiaomi.md +71 -0
  260. package/.docs/models/providers/zai-coding-plan.md +80 -0
  261. package/.docs/models/providers/zai.md +79 -0
  262. package/.docs/models/providers/zenmux.md +161 -0
  263. package/.docs/models/providers/zhipuai-coding-plan.md +79 -0
  264. package/.docs/models/providers/zhipuai.md +79 -0
  265. package/.docs/models/providers.md +81 -0
  266. package/.docs/reference/agents/agent.md +141 -0
  267. package/.docs/reference/agents/generate.md +186 -0
  268. package/.docs/reference/agents/generateLegacy.md +173 -0
  269. package/.docs/reference/agents/getDefaultGenerateOptions.md +36 -0
  270. package/.docs/reference/agents/getDefaultOptions.md +34 -0
  271. package/.docs/reference/agents/getDefaultStreamOptions.md +36 -0
  272. package/.docs/reference/agents/getDescription.md +21 -0
  273. package/.docs/reference/agents/getInstructions.md +34 -0
  274. package/.docs/reference/agents/getLLM.md +37 -0
  275. package/.docs/reference/agents/getMemory.md +34 -0
  276. package/.docs/reference/agents/getModel.md +34 -0
  277. package/.docs/reference/agents/getTools.md +29 -0
  278. package/.docs/reference/agents/getVoice.md +34 -0
  279. package/.docs/reference/agents/listAgents.md +35 -0
  280. package/.docs/reference/agents/listScorers.md +34 -0
  281. package/.docs/reference/agents/listTools.md +34 -0
  282. package/.docs/reference/agents/listWorkflows.md +34 -0
  283. package/.docs/reference/agents/network.md +133 -0
  284. package/.docs/reference/ai-sdk/chat-route.md +82 -0
  285. package/.docs/reference/ai-sdk/handle-chat-stream.md +53 -0
  286. package/.docs/reference/ai-sdk/handle-network-stream.md +37 -0
  287. package/.docs/reference/ai-sdk/handle-workflow-stream.md +55 -0
  288. package/.docs/reference/ai-sdk/network-route.md +74 -0
  289. package/.docs/reference/ai-sdk/to-ai-sdk-stream.md +231 -0
  290. package/.docs/reference/ai-sdk/to-ai-sdk-v4-messages.md +79 -0
  291. package/.docs/reference/ai-sdk/to-ai-sdk-v5-messages.md +76 -0
  292. package/.docs/reference/ai-sdk/with-mastra.md +59 -0
  293. package/.docs/reference/ai-sdk/workflow-route.md +79 -0
  294. package/.docs/reference/auth/auth0.md +73 -0
  295. package/.docs/reference/auth/better-auth.md +71 -0
  296. package/.docs/reference/auth/clerk.md +36 -0
  297. package/.docs/reference/auth/firebase.md +80 -0
  298. package/.docs/reference/auth/jwt.md +26 -0
  299. package/.docs/reference/auth/supabase.md +33 -0
  300. package/.docs/reference/auth/workos.md +84 -0
  301. package/.docs/reference/cli/create-mastra.md +137 -0
  302. package/.docs/reference/cli/mastra.md +336 -0
  303. package/.docs/reference/client-js/agents.md +437 -0
  304. package/.docs/reference/client-js/error-handling.md +16 -0
  305. package/.docs/reference/client-js/logs.md +24 -0
  306. package/.docs/reference/client-js/mastra-client.md +63 -0
  307. package/.docs/reference/client-js/memory.md +221 -0
  308. package/.docs/reference/client-js/observability.md +72 -0
  309. package/.docs/reference/client-js/telemetry.md +20 -0
  310. package/.docs/reference/client-js/tools.md +44 -0
  311. package/.docs/reference/client-js/vectors.md +79 -0
  312. package/.docs/reference/client-js/workflows.md +199 -0
  313. package/.docs/reference/configuration.md +752 -0
  314. package/.docs/reference/core/addGateway.md +42 -0
  315. package/.docs/reference/core/getAgent.md +21 -0
  316. package/.docs/reference/core/getAgentById.md +21 -0
  317. package/.docs/reference/core/getDeployer.md +22 -0
  318. package/.docs/reference/core/getGateway.md +38 -0
  319. package/.docs/reference/core/getGatewayById.md +41 -0
  320. package/.docs/reference/core/getLogger.md +22 -0
  321. package/.docs/reference/core/getMCPServer.md +47 -0
  322. package/.docs/reference/core/getMCPServerById.md +55 -0
  323. package/.docs/reference/core/getMemory.md +50 -0
  324. package/.docs/reference/core/getScorer.md +54 -0
  325. package/.docs/reference/core/getScorerById.md +54 -0
  326. package/.docs/reference/core/getServer.md +22 -0
  327. package/.docs/reference/core/getStorage.md +22 -0
  328. package/.docs/reference/core/getStoredAgentById.md +89 -0
  329. package/.docs/reference/core/getTelemetry.md +22 -0
  330. package/.docs/reference/core/getVector.md +22 -0
  331. package/.docs/reference/core/getWorkflow.md +42 -0
  332. package/.docs/reference/core/listAgents.md +21 -0
  333. package/.docs/reference/core/listGateways.md +40 -0
  334. package/.docs/reference/core/listLogs.md +38 -0
  335. package/.docs/reference/core/listLogsByRunId.md +36 -0
  336. package/.docs/reference/core/listMCPServers.md +55 -0
  337. package/.docs/reference/core/listMemory.md +56 -0
  338. package/.docs/reference/core/listScorers.md +29 -0
  339. package/.docs/reference/core/listStoredAgents.md +93 -0
  340. package/.docs/reference/core/listVectors.md +22 -0
  341. package/.docs/reference/core/listWorkflows.md +21 -0
  342. package/.docs/reference/core/mastra-class.md +66 -0
  343. package/.docs/reference/core/mastra-model-gateway.md +153 -0
  344. package/.docs/reference/core/setLogger.md +26 -0
  345. package/.docs/reference/core/setStorage.md +27 -0
  346. package/.docs/reference/datasets/addItem.md +37 -0
  347. package/.docs/reference/datasets/addItems.md +35 -0
  348. package/.docs/reference/datasets/compareExperiments.md +52 -0
  349. package/.docs/reference/datasets/create.md +51 -0
  350. package/.docs/reference/datasets/dataset.md +82 -0
  351. package/.docs/reference/datasets/datasets-manager.md +94 -0
  352. package/.docs/reference/datasets/delete.md +25 -0
  353. package/.docs/reference/datasets/deleteExperiment.md +27 -0
  354. package/.docs/reference/datasets/deleteItem.md +27 -0
  355. package/.docs/reference/datasets/deleteItems.md +29 -0
  356. package/.docs/reference/datasets/get.md +31 -0
  357. package/.docs/reference/datasets/getDetails.md +47 -0
  358. package/.docs/reference/datasets/getExperiment.md +30 -0
  359. package/.docs/reference/datasets/getItem.md +33 -0
  360. package/.docs/reference/datasets/getItemHistory.md +31 -0
  361. package/.docs/reference/datasets/list.md +31 -0
  362. package/.docs/reference/datasets/listExperimentResults.md +39 -0
  363. package/.docs/reference/datasets/listExperiments.md +33 -0
  364. package/.docs/reference/datasets/listItems.md +46 -0
  365. package/.docs/reference/datasets/listVersions.md +33 -0
  366. package/.docs/reference/datasets/startExperiment.md +62 -0
  367. package/.docs/reference/datasets/startExperimentAsync.md +43 -0
  368. package/.docs/reference/datasets/update.md +48 -0
  369. package/.docs/reference/datasets/updateItem.md +38 -0
  370. package/.docs/reference/deployer/cloudflare.md +79 -0
  371. package/.docs/reference/deployer/netlify.md +80 -0
  372. package/.docs/reference/deployer/vercel.md +91 -0
  373. package/.docs/reference/deployer.md +100 -0
  374. package/.docs/reference/evals/answer-relevancy.md +105 -0
  375. package/.docs/reference/evals/answer-similarity.md +99 -0
  376. package/.docs/reference/evals/bias.md +120 -0
  377. package/.docs/reference/evals/completeness.md +136 -0
  378. package/.docs/reference/evals/content-similarity.md +101 -0
  379. package/.docs/reference/evals/context-precision.md +196 -0
  380. package/.docs/reference/evals/context-relevance.md +531 -0
  381. package/.docs/reference/evals/create-scorer.md +270 -0
  382. package/.docs/reference/evals/faithfulness.md +114 -0
  383. package/.docs/reference/evals/hallucination.md +213 -0
  384. package/.docs/reference/evals/keyword-coverage.md +128 -0
  385. package/.docs/reference/evals/mastra-scorer.md +123 -0
  386. package/.docs/reference/evals/noise-sensitivity.md +675 -0
  387. package/.docs/reference/evals/prompt-alignment.md +614 -0
  388. package/.docs/reference/evals/run-evals.md +179 -0
  389. package/.docs/reference/evals/scorer-utils.md +326 -0
  390. package/.docs/reference/evals/textual-difference.md +113 -0
  391. package/.docs/reference/evals/tone-consistency.md +119 -0
  392. package/.docs/reference/evals/tool-call-accuracy.md +533 -0
  393. package/.docs/reference/evals/toxicity.md +123 -0
  394. package/.docs/reference/harness/harness-class.md +708 -0
  395. package/.docs/reference/index.md +277 -0
  396. package/.docs/reference/logging/pino-logger.md +117 -0
  397. package/.docs/reference/mastra-code/createMastraCode.md +108 -0
  398. package/.docs/reference/memory/clone-utilities.md +199 -0
  399. package/.docs/reference/memory/cloneThread.md +130 -0
  400. package/.docs/reference/memory/createThread.md +68 -0
  401. package/.docs/reference/memory/deleteMessages.md +38 -0
  402. package/.docs/reference/memory/getThreadById.md +24 -0
  403. package/.docs/reference/memory/listThreads.md +145 -0
  404. package/.docs/reference/memory/memory-class.md +147 -0
  405. package/.docs/reference/memory/observational-memory.md +565 -0
  406. package/.docs/reference/memory/recall.md +91 -0
  407. package/.docs/reference/observability/tracing/bridges/otel.md +131 -0
  408. package/.docs/reference/observability/tracing/configuration.md +178 -0
  409. package/.docs/reference/observability/tracing/exporters/arize.md +141 -0
  410. package/.docs/reference/observability/tracing/exporters/braintrust.md +93 -0
  411. package/.docs/reference/observability/tracing/exporters/cloud-exporter.md +163 -0
  412. package/.docs/reference/observability/tracing/exporters/console-exporter.md +138 -0
  413. package/.docs/reference/observability/tracing/exporters/datadog.md +116 -0
  414. package/.docs/reference/observability/tracing/exporters/default-exporter.md +174 -0
  415. package/.docs/reference/observability/tracing/exporters/laminar.md +78 -0
  416. package/.docs/reference/observability/tracing/exporters/langfuse.md +134 -0
  417. package/.docs/reference/observability/tracing/exporters/langsmith.md +108 -0
  418. package/.docs/reference/observability/tracing/exporters/otel.md +199 -0
  419. package/.docs/reference/observability/tracing/exporters/posthog.md +92 -0
  420. package/.docs/reference/observability/tracing/exporters/sentry.md +184 -0
  421. package/.docs/reference/observability/tracing/instances.md +107 -0
  422. package/.docs/reference/observability/tracing/interfaces.md +743 -0
  423. package/.docs/reference/observability/tracing/processors/sensitive-data-filter.md +144 -0
  424. package/.docs/reference/observability/tracing/spans.md +224 -0
  425. package/.docs/reference/processors/batch-parts-processor.md +61 -0
  426. package/.docs/reference/processors/language-detector.md +82 -0
  427. package/.docs/reference/processors/message-history-processor.md +85 -0
  428. package/.docs/reference/processors/moderation-processor.md +104 -0
  429. package/.docs/reference/processors/pii-detector.md +108 -0
  430. package/.docs/reference/processors/processor-interface.md +521 -0
  431. package/.docs/reference/processors/prompt-injection-detector.md +72 -0
  432. package/.docs/reference/processors/semantic-recall-processor.md +117 -0
  433. package/.docs/reference/processors/system-prompt-scrubber.md +80 -0
  434. package/.docs/reference/processors/token-limiter-processor.md +115 -0
  435. package/.docs/reference/processors/tool-call-filter.md +85 -0
  436. package/.docs/reference/processors/tool-search-processor.md +111 -0
  437. package/.docs/reference/processors/unicode-normalizer.md +62 -0
  438. package/.docs/reference/processors/working-memory-processor.md +152 -0
  439. package/.docs/reference/rag/chunk.md +221 -0
  440. package/.docs/reference/rag/database-config.md +261 -0
  441. package/.docs/reference/rag/document.md +114 -0
  442. package/.docs/reference/rag/embeddings.md +92 -0
  443. package/.docs/reference/rag/extract-params.md +168 -0
  444. package/.docs/reference/rag/graph-rag.md +111 -0
  445. package/.docs/reference/rag/metadata-filters.md +216 -0
  446. package/.docs/reference/rag/rerank.md +75 -0
  447. package/.docs/reference/rag/rerankWithScorer.md +80 -0
  448. package/.docs/reference/server/create-route.md +262 -0
  449. package/.docs/reference/server/express-adapter.md +176 -0
  450. package/.docs/reference/server/fastify-adapter.md +90 -0
  451. package/.docs/reference/server/hono-adapter.md +162 -0
  452. package/.docs/reference/server/koa-adapter.md +127 -0
  453. package/.docs/reference/server/mastra-server.md +298 -0
  454. package/.docs/reference/server/register-api-route.md +249 -0
  455. package/.docs/reference/server/routes.md +306 -0
  456. package/.docs/reference/storage/cloudflare-d1.md +218 -0
  457. package/.docs/reference/storage/cloudflare.md +88 -0
  458. package/.docs/reference/storage/composite.md +235 -0
  459. package/.docs/reference/storage/convex.md +161 -0
  460. package/.docs/reference/storage/dynamodb.md +282 -0
  461. package/.docs/reference/storage/lance.md +131 -0
  462. package/.docs/reference/storage/libsql.md +135 -0
  463. package/.docs/reference/storage/mongodb.md +262 -0
  464. package/.docs/reference/storage/mssql.md +157 -0
  465. package/.docs/reference/storage/overview.md +121 -0
  466. package/.docs/reference/storage/postgresql.md +526 -0
  467. package/.docs/reference/storage/upstash.md +160 -0
  468. package/.docs/reference/streaming/ChunkType.md +292 -0
  469. package/.docs/reference/streaming/agents/MastraModelOutput.md +182 -0
  470. package/.docs/reference/streaming/agents/stream.md +221 -0
  471. package/.docs/reference/streaming/agents/streamLegacy.md +142 -0
  472. package/.docs/reference/streaming/workflows/observeStream.md +42 -0
  473. package/.docs/reference/streaming/workflows/resumeStream.md +61 -0
  474. package/.docs/reference/streaming/workflows/stream.md +88 -0
  475. package/.docs/reference/streaming/workflows/timeTravelStream.md +142 -0
  476. package/.docs/reference/templates/overview.md +194 -0
  477. package/.docs/reference/tools/create-tool.md +237 -0
  478. package/.docs/reference/tools/document-chunker-tool.md +89 -0
  479. package/.docs/reference/tools/graph-rag-tool.md +182 -0
  480. package/.docs/reference/tools/mcp-client.md +954 -0
  481. package/.docs/reference/tools/mcp-server.md +1271 -0
  482. package/.docs/reference/tools/vector-query-tool.md +459 -0
  483. package/.docs/reference/vectors/astra.md +121 -0
  484. package/.docs/reference/vectors/chroma.md +264 -0
  485. package/.docs/reference/vectors/convex.md +300 -0
  486. package/.docs/reference/vectors/couchbase.md +226 -0
  487. package/.docs/reference/vectors/duckdb.md +318 -0
  488. package/.docs/reference/vectors/elasticsearch.md +189 -0
  489. package/.docs/reference/vectors/lance.md +220 -0
  490. package/.docs/reference/vectors/libsql.md +305 -0
  491. package/.docs/reference/vectors/mongodb.md +295 -0
  492. package/.docs/reference/vectors/opensearch.md +99 -0
  493. package/.docs/reference/vectors/pg.md +408 -0
  494. package/.docs/reference/vectors/pinecone.md +168 -0
  495. package/.docs/reference/vectors/qdrant.md +222 -0
  496. package/.docs/reference/vectors/s3vectors.md +277 -0
  497. package/.docs/reference/vectors/turbopuffer.md +157 -0
  498. package/.docs/reference/vectors/upstash.md +294 -0
  499. package/.docs/reference/vectors/vectorize.md +147 -0
  500. package/.docs/reference/voice/azure.md +148 -0
  501. package/.docs/reference/voice/cloudflare.md +83 -0
  502. package/.docs/reference/voice/composite-voice.md +121 -0
  503. package/.docs/reference/voice/deepgram.md +79 -0
  504. package/.docs/reference/voice/elevenlabs.md +98 -0
  505. package/.docs/reference/voice/google-gemini-live.md +378 -0
  506. package/.docs/reference/voice/google.md +228 -0
  507. package/.docs/reference/voice/mastra-voice.md +311 -0
  508. package/.docs/reference/voice/murf.md +122 -0
  509. package/.docs/reference/voice/openai-realtime.md +203 -0
  510. package/.docs/reference/voice/openai.md +88 -0
  511. package/.docs/reference/voice/playai.md +80 -0
  512. package/.docs/reference/voice/sarvam.md +126 -0
  513. package/.docs/reference/voice/speechify.md +75 -0
  514. package/.docs/reference/voice/voice.addInstructions.md +55 -0
  515. package/.docs/reference/voice/voice.addTools.md +67 -0
  516. package/.docs/reference/voice/voice.answer.md +54 -0
  517. package/.docs/reference/voice/voice.close.md +51 -0
  518. package/.docs/reference/voice/voice.connect.md +94 -0
  519. package/.docs/reference/voice/voice.events.md +37 -0
  520. package/.docs/reference/voice/voice.getSpeakers.md +129 -0
  521. package/.docs/reference/voice/voice.listen.md +164 -0
  522. package/.docs/reference/voice/voice.off.md +54 -0
  523. package/.docs/reference/voice/voice.on.md +111 -0
  524. package/.docs/reference/voice/voice.send.md +65 -0
  525. package/.docs/reference/voice/voice.speak.md +157 -0
  526. package/.docs/reference/voice/voice.updateConfig.md +60 -0
  527. package/.docs/reference/workflows/run-methods/cancel.md +86 -0
  528. package/.docs/reference/workflows/run-methods/restart.md +33 -0
  529. package/.docs/reference/workflows/run-methods/resume.md +59 -0
  530. package/.docs/reference/workflows/run-methods/start.md +58 -0
  531. package/.docs/reference/workflows/run-methods/startAsync.md +67 -0
  532. package/.docs/reference/workflows/run-methods/timeTravel.md +142 -0
  533. package/.docs/reference/workflows/run.md +59 -0
  534. package/.docs/reference/workflows/step.md +119 -0
  535. package/.docs/reference/workflows/workflow-methods/branch.md +25 -0
  536. package/.docs/reference/workflows/workflow-methods/commit.md +17 -0
  537. package/.docs/reference/workflows/workflow-methods/create-run.md +63 -0
  538. package/.docs/reference/workflows/workflow-methods/dountil.md +25 -0
  539. package/.docs/reference/workflows/workflow-methods/dowhile.md +25 -0
  540. package/.docs/reference/workflows/workflow-methods/foreach.md +118 -0
  541. package/.docs/reference/workflows/workflow-methods/map.md +93 -0
  542. package/.docs/reference/workflows/workflow-methods/parallel.md +21 -0
  543. package/.docs/reference/workflows/workflow-methods/sleep.md +35 -0
  544. package/.docs/reference/workflows/workflow-methods/sleepUntil.md +35 -0
  545. package/.docs/reference/workflows/workflow-methods/then.md +21 -0
  546. package/.docs/reference/workflows/workflow.md +157 -0
  547. package/.docs/reference/workspace/e2b-sandbox.md +289 -0
  548. package/.docs/reference/workspace/filesystem.md +255 -0
  549. package/.docs/reference/workspace/gcs-filesystem.md +174 -0
  550. package/.docs/reference/workspace/local-filesystem.md +343 -0
  551. package/.docs/reference/workspace/local-sandbox.md +301 -0
  552. package/.docs/reference/workspace/s3-filesystem.md +175 -0
  553. package/.docs/reference/workspace/sandbox.md +87 -0
  554. package/.docs/reference/workspace/workspace-class.md +244 -0
  555. package/CHANGELOG.md +8 -0
  556. package/package.json +5 -5
@@ -0,0 +1,179 @@
1
+ # runEvals
2
+
3
+ The `runEvals` function enables batch evaluation of agents and workflows by running multiple test cases against scorers concurrently. This is essential for systematic testing, performance analysis, and validation of AI systems.
4
+
5
+ ## Usage Example
6
+
7
+ ```typescript
8
+ import { runEvals } from '@mastra/core/evals'
9
+ import { myAgent } from './agents/my-agent'
10
+ import { myScorer1, myScorer2 } from './scorers'
11
+
12
+ const result = await runEvals({
13
+ target: myAgent,
14
+ data: [
15
+ { input: 'What is machine learning?' },
16
+ { input: 'Explain neural networks' },
17
+ { input: 'How does AI work?' },
18
+ ],
19
+ scorers: [myScorer1, myScorer2],
20
+ targetOptions: { maxSteps: 5 },
21
+ concurrency: 2,
22
+ onItemComplete: ({ item, targetResult, scorerResults }) => {
23
+ console.log(`Completed: ${item.input}`)
24
+ console.log(`Scores:`, scorerResults)
25
+ },
26
+ })
27
+
28
+ console.log(`Average scores:`, result.scores)
29
+ console.log(`Processed ${result.summary.totalItems} items`)
30
+ ```
31
+
32
+ ## Parameters
33
+
34
+ **target:** (`Agent | Workflow`): The agent or workflow to evaluate.
35
+
36
+ **data:** (`RunEvalsDataItem[]`): Array of test cases with input data and optional ground truth.
37
+
38
+ **scorers:** (`MastraScorer[] | WorkflowScorerConfig`): Array of scorers for agents, or configuration object for workflows specifying scorers for the workflow and individual steps.
39
+
40
+ **targetOptions?:** (`AgentExecutionOptions | WorkflowRunOptions`): Options forwarded to the target during execution. For agents: options passed to agent.generate() (e.g. maxSteps, modelSettings, instructions). For workflows: options passed to run.start() (e.g. perStep, outputOptions, initialState).
41
+
42
+ **concurrency?:** (`number`): Number of test cases to run concurrently. (Default: `1`)
43
+
44
+ **onItemComplete?:** (`function`): Callback function called after each test case completes. Receives item, target result, and scorer results.
45
+
46
+ ## Data Item Structure
47
+
48
+ **input:** (`string | string[] | CoreMessage[] | any`): Input data for the target. For agents: messages or strings. For workflows: workflow input data.
49
+
50
+ **groundTruth?:** (`any`): Expected or reference output for comparison during scoring.
51
+
52
+ **requestContext?:** (`RequestContext`): Request Context to pass to the target during execution.
53
+
54
+ **tracingContext?:** (`TracingContext`): Tracing context for observability and debugging.
55
+
56
+ **startOptions?:** (`WorkflowRunOptions`): Per-item workflow run options (e.g. initialState, perStep, outputOptions). Merged on top of targetOptions, so per-item values take precedence. Only applicable when the target is a workflow.
57
+
58
+ ## Workflow Scorer Configuration
59
+
60
+ For workflows, you can specify scorers at different levels using `WorkflowScorerConfig`:
61
+
62
+ **workflow?:** (`MastraScorer[]`): Array of scorers to evaluate the entire workflow output.
63
+
64
+ **steps?:** (`Record<string, MastraScorer[]>`): Object mapping step IDs to arrays of scorers for evaluating individual step outputs.
65
+
66
+ ## Returns
67
+
68
+ **scores:** (`Record<string, any>`): Average scores across all test cases, organized by scorer name.
69
+
70
+ **summary:** (`object`): Summary information about the experiment execution.
71
+
72
+ **summary.totalItems:** (`number`): Total number of test cases processed.
73
+
74
+ ## Examples
75
+
76
+ ### Agent Evaluation
77
+
78
+ ```typescript
79
+ import { createScorer, runEvals } from '@mastra/core/evals'
80
+
81
+ const myScorer = createScorer({
82
+ id: 'my-scorer',
83
+ description: "Check if Agent's response contains ground truth",
84
+ type: 'agent',
85
+ }).generateScore(({ run }) => {
86
+ const response = run.output[0]?.content || ''
87
+ const expectedResponse = run.groundTruth
88
+ return response.includes(expectedResponse) ? 1 : 0
89
+ })
90
+
91
+ const result = await runEvals({
92
+ target: chatAgent,
93
+ data: [
94
+ {
95
+ input: 'What is AI?',
96
+ groundTruth: 'AI is a field of computer science that creates intelligent machines.',
97
+ },
98
+ {
99
+ input: 'How does machine learning work?',
100
+ groundTruth: 'Machine learning uses algorithms to learn patterns from data.',
101
+ },
102
+ ],
103
+ scorers: [relevancyScorer],
104
+ concurrency: 3,
105
+ })
106
+ ```
107
+
108
+ ### Agent with targetOptions
109
+
110
+ Pass execution options like `maxSteps` or `modelSettings` to customize agent behavior during evaluation:
111
+
112
+ ```typescript
113
+ const result = await runEvals({
114
+ target: chatAgent,
115
+ data: [{ input: 'Summarize this article' }, { input: 'Translate to French' }],
116
+ scorers: [relevancyScorer],
117
+ targetOptions: {
118
+ maxSteps: 5,
119
+ modelSettings: { temperature: 0 },
120
+ },
121
+ })
122
+ ```
123
+
124
+ ### Workflow Evaluation
125
+
126
+ ```typescript
127
+ const workflowResult = await runEvals({
128
+ target: myWorkflow,
129
+ data: [
130
+ { input: { query: 'Process this data', priority: 'high' } },
131
+ { input: { query: 'Another task', priority: 'low' } },
132
+ ],
133
+ scorers: {
134
+ workflow: [outputQualityScorer],
135
+ steps: {
136
+ 'validation-step': [validationScorer],
137
+ 'processing-step': [processingScorer],
138
+ },
139
+ },
140
+ onItemComplete: ({ item, targetResult, scorerResults }) => {
141
+ console.log(`Workflow completed for: ${item.inputData.query}`)
142
+ if (scorerResults.workflow) {
143
+ console.log('Workflow scores:', scorerResults.workflow)
144
+ }
145
+ if (scorerResults.steps) {
146
+ console.log('Step scores:', scorerResults.steps)
147
+ }
148
+ },
149
+ })
150
+ ```
151
+
152
+ ### Workflow with per-item startOptions
153
+
154
+ Use `startOptions` on individual data items to customize each workflow run. Per-item values take precedence over `targetOptions`:
155
+
156
+ ```typescript
157
+ const result = await runEvals({
158
+ target: myWorkflow,
159
+ data: [
160
+ {
161
+ input: { query: 'hello' },
162
+ startOptions: { initialState: { counter: 1 } },
163
+ },
164
+ {
165
+ input: { query: 'world' },
166
+ startOptions: { initialState: { counter: 2 } },
167
+ },
168
+ ],
169
+ scorers: [outputQualityScorer],
170
+ targetOptions: { perStep: true },
171
+ })
172
+ ```
173
+
174
+ ## Related
175
+
176
+ - [createScorer()](https://mastra.ai/reference/evals/create-scorer) - Create custom scorers for experiments
177
+ - [MastraScorer](https://mastra.ai/reference/evals/mastra-scorer) - Learn about scorer structure and methods
178
+ - [Custom Scorers](https://mastra.ai/docs/evals/custom-scorers) - Guide to building evaluation logic
179
+ - [Scorers Overview](https://mastra.ai/docs/evals/overview) - Understanding scorer concepts
@@ -0,0 +1,326 @@
1
+ # Scorer Utils
2
+
3
+ Mastra provides utility functions to help extract and process data from scorer run inputs and outputs. These utilities are particularly useful in the `preprocess` step of custom scorers.
4
+
5
+ ## Import
6
+
7
+ ```typescript
8
+ import {
9
+ getAssistantMessageFromRunOutput,
10
+ getReasoningFromRunOutput,
11
+ getUserMessageFromRunInput,
12
+ getSystemMessagesFromRunInput,
13
+ getCombinedSystemPrompt,
14
+ extractToolCalls,
15
+ extractInputMessages,
16
+ extractAgentResponseMessages,
17
+ } from '@mastra/evals/scorers/utils'
18
+ ```
19
+
20
+ ## Message Extraction
21
+
22
+ ### getAssistantMessageFromRunOutput
23
+
24
+ Extracts the text content from the first assistant message in the run output.
25
+
26
+ ```typescript
27
+ const scorer = createScorer({
28
+ id: 'my-scorer',
29
+ description: 'My scorer',
30
+ type: 'agent',
31
+ })
32
+ .preprocess(({ run }) => {
33
+ const response = getAssistantMessageFromRunOutput(run.output)
34
+ return { response }
35
+ })
36
+ .generateScore(({ results }) => {
37
+ return results.preprocessStepResult?.response ? 1 : 0
38
+ })
39
+ ```
40
+
41
+ **output?:** (`ScorerRunOutputForAgent`): The scorer run output (array of MastraDBMessage)
42
+
43
+ **Returns:** `string | undefined` - The assistant message text, or undefined if no assistant message is found.
44
+
45
+ ### getUserMessageFromRunInput
46
+
47
+ Extracts the text content from the first user message in the run input.
48
+
49
+ ```typescript
50
+ .preprocess(({ run }) => {
51
+ const userMessage = getUserMessageFromRunInput(run.input);
52
+ return { userMessage };
53
+ })
54
+ ```
55
+
56
+ **input?:** (`ScorerRunInputForAgent`): The scorer run input containing input messages
57
+
58
+ **Returns:** `string | undefined` - The user message text, or undefined if no user message is found.
59
+
60
+ ### extractInputMessages
61
+
62
+ Extracts text content from all input messages as an array.
63
+
64
+ ```typescript
65
+ .preprocess(({ run }) => {
66
+ const allUserMessages = extractInputMessages(run.input);
67
+ return { conversationHistory: allUserMessages.join("\n") };
68
+ })
69
+ ```
70
+
71
+ **Returns:** `string[]` - Array of text strings from each input message.
72
+
73
+ ### extractAgentResponseMessages
74
+
75
+ Extracts text content from all assistant response messages as an array.
76
+
77
+ ```typescript
78
+ .preprocess(({ run }) => {
79
+ const allResponses = extractAgentResponseMessages(run.output);
80
+ return { allResponses };
81
+ })
82
+ ```
83
+
84
+ **Returns:** `string[]` - Array of text strings from each assistant message.
85
+
86
+ ## Reasoning Extraction
87
+
88
+ ### getReasoningFromRunOutput
89
+
90
+ Extracts reasoning text from the run output. This is particularly useful when evaluating responses from reasoning models like `deepseek-reasoner` that produce chain-of-thought reasoning.
91
+
92
+ Reasoning can be stored in two places:
93
+
94
+ 1. `content.reasoning` - a string field on the message content
95
+ 2. `content.parts` - as parts with `type: 'reasoning'` containing `details`
96
+
97
+ ```typescript
98
+ import {
99
+ getReasoningFromRunOutput,
100
+ getAssistantMessageFromRunOutput,
101
+ } from '@mastra/evals/scorers/utils'
102
+
103
+ const reasoningQualityScorer = createScorer({
104
+ id: 'reasoning-quality',
105
+ name: 'Reasoning Quality',
106
+ description: 'Evaluates the quality of model reasoning',
107
+ type: 'agent',
108
+ })
109
+ .preprocess(({ run }) => {
110
+ const reasoning = getReasoningFromRunOutput(run.output)
111
+ const response = getAssistantMessageFromRunOutput(run.output)
112
+ return { reasoning, response }
113
+ })
114
+ .analyze(({ results }) => {
115
+ const { reasoning } = results.preprocessStepResult || {}
116
+ return {
117
+ hasReasoning: !!reasoning,
118
+ reasoningLength: reasoning?.length || 0,
119
+ hasStepByStep: reasoning?.includes('step') || false,
120
+ }
121
+ })
122
+ .generateScore(({ results }) => {
123
+ const { hasReasoning, reasoningLength } = results.analyzeStepResult || {}
124
+ if (!hasReasoning) return 0
125
+ // Score based on reasoning length (normalized to 0-1)
126
+ return Math.min(reasoningLength / 500, 1)
127
+ })
128
+ .generateReason(({ results, score }) => {
129
+ const { hasReasoning, reasoningLength } = results.analyzeStepResult || {}
130
+ if (!hasReasoning) {
131
+ return 'No reasoning was provided by the model.'
132
+ }
133
+ return `Model provided ${reasoningLength} characters of reasoning. Score: ${score}`
134
+ })
135
+ ```
136
+
137
+ **output?:** (`ScorerRunOutputForAgent`): The scorer run output (array of MastraDBMessage)
138
+
139
+ **Returns:** `string | undefined` - The reasoning text, or undefined if no reasoning is present.
140
+
141
+ ## System Message Extraction
142
+
143
+ ### getSystemMessagesFromRunInput
144
+
145
+ Extracts all system messages from the run input, including both standard system messages and tagged system messages (specialized prompts like memory instructions).
146
+
147
+ ```typescript
148
+ .preprocess(({ run }) => {
149
+ const systemMessages = getSystemMessagesFromRunInput(run.input);
150
+ return {
151
+ systemPromptCount: systemMessages.length,
152
+ systemPrompts: systemMessages
153
+ };
154
+ })
155
+ ```
156
+
157
+ **Returns:** `string[]` - Array of system message strings.
158
+
159
+ ### getCombinedSystemPrompt
160
+
161
+ Combines all system messages into a single prompt string, joined with double newlines.
162
+
163
+ ```typescript
164
+ .preprocess(({ run }) => {
165
+ const fullSystemPrompt = getCombinedSystemPrompt(run.input);
166
+ return { fullSystemPrompt };
167
+ })
168
+ ```
169
+
170
+ **Returns:** `string` - Combined system prompt string.
171
+
172
+ ## Tool Call Extraction
173
+
174
+ ### extractToolCalls
175
+
176
+ Extracts information about all tool calls from the run output, including tool names, call IDs, and their positions in the message array.
177
+
178
+ ```typescript
179
+ const toolUsageScorer = createScorer({
180
+ id: 'tool-usage',
181
+ description: 'Evaluates tool usage patterns',
182
+ type: 'agent',
183
+ })
184
+ .preprocess(({ run }) => {
185
+ const { tools, toolCallInfos } = extractToolCalls(run.output)
186
+ return {
187
+ toolsUsed: tools,
188
+ toolCount: tools.length,
189
+ toolDetails: toolCallInfos,
190
+ }
191
+ })
192
+ .generateScore(({ results }) => {
193
+ const { toolCount } = results.preprocessStepResult || {}
194
+ // Score based on appropriate tool usage
195
+ return toolCount > 0 ? 1 : 0
196
+ })
197
+ ```
198
+
199
+ **Returns:**
200
+
201
+ ```typescript
202
+ {
203
+ tools: string[]; // Array of tool names
204
+ toolCallInfos: ToolCallInfo[]; // Detailed tool call information
205
+ }
206
+ ```
207
+
208
+ Where `ToolCallInfo` is:
209
+
210
+ ```typescript
211
+ type ToolCallInfo = {
212
+ toolName: string // Name of the tool
213
+ toolCallId: string // Unique call identifier
214
+ messageIndex: number // Index in the output array
215
+ invocationIndex: number // Index within message's tool invocations
216
+ }
217
+ ```
218
+
219
+ ## Test Utilities
220
+
221
+ These utilities help create test data for scorer development.
222
+
223
+ ### createTestMessage
224
+
225
+ Creates a `MastraDBMessage` object for testing purposes.
226
+
227
+ ```typescript
228
+ import { createTestMessage } from '@mastra/evals/scorers/utils'
229
+
230
+ const userMessage = createTestMessage({
231
+ content: 'What is the weather?',
232
+ role: 'user',
233
+ })
234
+
235
+ const assistantMessage = createTestMessage({
236
+ content: 'The weather is sunny.',
237
+ role: 'assistant',
238
+ toolInvocations: [
239
+ {
240
+ toolCallId: 'call-1',
241
+ toolName: 'weatherTool',
242
+ args: { location: 'London' },
243
+ result: { temp: 20 },
244
+ state: 'result',
245
+ },
246
+ ],
247
+ })
248
+ ```
249
+
250
+ ### createAgentTestRun
251
+
252
+ Creates a complete test run object for testing scorers.
253
+
254
+ ```typescript
255
+ import { createAgentTestRun, createTestMessage } from '@mastra/evals/scorers/utils'
256
+
257
+ const testRun = createAgentTestRun({
258
+ inputMessages: [createTestMessage({ content: 'Hello', role: 'user' })],
259
+ output: [createTestMessage({ content: 'Hi there!', role: 'assistant' })],
260
+ })
261
+
262
+ // Run your scorer with the test data
263
+ const result = await myScorer.run({
264
+ input: testRun.input,
265
+ output: testRun.output,
266
+ })
267
+ ```
268
+
269
+ ## Complete Example
270
+
271
+ Here's a complete example showing how to use multiple utilities together:
272
+
273
+ ```typescript
274
+ import { createScorer } from '@mastra/core/evals'
275
+ import {
276
+ getAssistantMessageFromRunOutput,
277
+ getReasoningFromRunOutput,
278
+ getUserMessageFromRunInput,
279
+ getCombinedSystemPrompt,
280
+ extractToolCalls,
281
+ } from '@mastra/evals/scorers/utils'
282
+
283
+ const comprehensiveScorer = createScorer({
284
+ id: 'comprehensive-analysis',
285
+ name: 'Comprehensive Analysis',
286
+ description: 'Analyzes all aspects of an agent response',
287
+ type: 'agent',
288
+ })
289
+ .preprocess(({ run }) => {
290
+ // Extract all relevant data
291
+ const userMessage = getUserMessageFromRunInput(run.input)
292
+ const response = getAssistantMessageFromRunOutput(run.output)
293
+ const reasoning = getReasoningFromRunOutput(run.output)
294
+ const systemPrompt = getCombinedSystemPrompt(run.input)
295
+ const { tools, toolCallInfos } = extractToolCalls(run.output)
296
+
297
+ return {
298
+ userMessage,
299
+ response,
300
+ reasoning,
301
+ systemPrompt,
302
+ toolsUsed: tools,
303
+ toolCount: tools.length,
304
+ }
305
+ })
306
+ .generateScore(({ results }) => {
307
+ const { response, reasoning, toolCount } = results.preprocessStepResult || {}
308
+
309
+ let score = 0
310
+ if (response && response.length > 0) score += 0.4
311
+ if (reasoning) score += 0.3
312
+ if (toolCount > 0) score += 0.3
313
+
314
+ return score
315
+ })
316
+ .generateReason(({ results, score }) => {
317
+ const { response, reasoning, toolCount } = results.preprocessStepResult || {}
318
+
319
+ const parts = []
320
+ if (response) parts.push('provided a response')
321
+ if (reasoning) parts.push('included reasoning')
322
+ if (toolCount > 0) parts.push(`used ${toolCount} tool(s)`)
323
+
324
+ return `Score: ${score}. The agent ${parts.join(', ')}.`
325
+ })
326
+ ```
@@ -0,0 +1,113 @@
1
+ # Textual Difference Scorer
2
+
3
+ The `createTextualDifferenceScorer()` function uses sequence matching to measure the textual differences between two strings. It provides detailed information about changes, including the number of operations needed to transform one text into another.
4
+
5
+ ## Parameters
6
+
7
+ The `createTextualDifferenceScorer()` function does not take any options.
8
+
9
+ This function returns an instance of the MastraScorer class. See the [MastraScorer reference](https://mastra.ai/reference/evals/mastra-scorer) for details on the `.run()` method and its input/output.
10
+
11
+ ## .run() Returns
12
+
13
+ **runId:** (`string`): The id of the run (optional).
14
+
15
+ **analyzeStepResult:** (`object`): Object with difference metrics: { confidence: number, changes: number, lengthDiff: number }
16
+
17
+ **score:** (`number`): Similarity ratio (0-1) where 1 indicates identical texts.
18
+
19
+ `.run()` returns a result in the following shape:
20
+
21
+ ```typescript
22
+ {
23
+ runId: string,
24
+ analyzeStepResult: {
25
+ confidence: number,
26
+ ratio: number,
27
+ changes: number,
28
+ lengthDiff: number
29
+ },
30
+ score: number
31
+ }
32
+ ```
33
+
34
+ ## Scoring Details
35
+
36
+ The scorer calculates several measures:
37
+
38
+ - **Similarity Ratio**: Based on sequence matching between texts (0-1)
39
+ - **Changes**: Count of non-matching operations needed
40
+ - **Length Difference**: Normalized difference in text lengths
41
+ - **Confidence**: Inversely proportional to length difference
42
+
43
+ ### Scoring Process
44
+
45
+ 1. Analyzes textual differences:
46
+
47
+ - Performs sequence matching between input and output
48
+ - Counts the number of change operations required
49
+ - Measures length differences
50
+
51
+ 2. Calculates metrics:
52
+
53
+ - Computes similarity ratio
54
+ - Determines confidence score
55
+ - Combines into weighted score
56
+
57
+ Final score: `(similarity_ratio * confidence) * scale`
58
+
59
+ ### Score interpretation
60
+
61
+ A textual difference score between 0 and 1:
62
+
63
+ - **1.0**: Identical texts – no differences detected.
64
+ - **0.7–0.9**: Minor differences – few changes needed.
65
+ - **0.4–0.6**: Moderate differences – noticeable changes required.
66
+ - **0.1–0.3**: Major differences – extensive changes needed.
67
+ - **0.0**: Completely different texts.
68
+
69
+ ## Example
70
+
71
+ Measure textual differences between expected and actual agent outputs:
72
+
73
+ ```typescript
74
+ import { runEvals } from '@mastra/core/evals'
75
+ import { createTextualDifferenceScorer } from '@mastra/evals/scorers/prebuilt'
76
+ import { myAgent } from './agent'
77
+
78
+ const scorer = createTextualDifferenceScorer()
79
+
80
+ const result = await runEvals({
81
+ data: [
82
+ {
83
+ input: 'Summarize the concept of recursion',
84
+ groundTruth:
85
+ 'Recursion is when a function calls itself to solve a problem by breaking it into smaller subproblems.',
86
+ },
87
+ {
88
+ input: 'What is the capital of France?',
89
+ groundTruth: 'The capital of France is Paris.',
90
+ },
91
+ ],
92
+ scorers: [scorer],
93
+ target: myAgent,
94
+ onItemComplete: ({ scorerResults }) => {
95
+ console.log({
96
+ score: scorerResults[scorer.id].score,
97
+ groundTruth: scorerResults[scorer.id].groundTruth,
98
+ })
99
+ },
100
+ })
101
+
102
+ console.log(result.scores)
103
+ ```
104
+
105
+ For more details on `runEvals`, see the [runEvals reference](https://mastra.ai/reference/evals/run-evals).
106
+
107
+ To add this scorer to an agent, see the [Scorers overview](https://mastra.ai/docs/evals/overview) guide.
108
+
109
+ ## Related
110
+
111
+ - [Content Similarity Scorer](https://mastra.ai/reference/evals/content-similarity)
112
+ - [Completeness Scorer](https://mastra.ai/reference/evals/completeness)
113
+ - [Keyword Coverage Scorer](https://mastra.ai/reference/evals/keyword-coverage)