@mastra/mcp-docs-server 1.1.5 → 1.1.6-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (556) hide show
  1. package/.docs/docs/agents/adding-voice.md +349 -0
  2. package/.docs/docs/agents/agent-approval.md +558 -0
  3. package/.docs/docs/agents/agent-memory.md +209 -0
  4. package/.docs/docs/agents/guardrails.md +374 -0
  5. package/.docs/docs/agents/network-approval.md +275 -0
  6. package/.docs/docs/agents/networks.md +299 -0
  7. package/.docs/docs/agents/overview.md +304 -0
  8. package/.docs/docs/agents/processors.md +622 -0
  9. package/.docs/docs/agents/structured-output.md +273 -0
  10. package/.docs/docs/agents/supervisor-agents.md +304 -0
  11. package/.docs/docs/agents/using-tools.md +214 -0
  12. package/.docs/docs/build-with-ai/mcp-docs-server.md +238 -0
  13. package/.docs/docs/build-with-ai/skills.md +35 -0
  14. package/.docs/docs/community/contributing-templates.md +3 -0
  15. package/.docs/docs/community/discord.md +9 -0
  16. package/.docs/docs/community/licensing.md +66 -0
  17. package/.docs/docs/deployment/cloud-providers.md +15 -0
  18. package/.docs/docs/deployment/mastra-server.md +122 -0
  19. package/.docs/docs/deployment/monorepo.md +142 -0
  20. package/.docs/docs/deployment/overview.md +62 -0
  21. package/.docs/docs/deployment/studio.md +239 -0
  22. package/.docs/docs/deployment/web-framework.md +52 -0
  23. package/.docs/docs/deployment/workflow-runners.md +9 -0
  24. package/.docs/docs/evals/built-in-scorers.md +47 -0
  25. package/.docs/docs/evals/custom-scorers.md +519 -0
  26. package/.docs/docs/evals/overview.md +141 -0
  27. package/.docs/docs/evals/running-in-ci.md +124 -0
  28. package/.docs/docs/getting-started/build-with-ai.md +68 -0
  29. package/.docs/docs/getting-started/manual-install.md +226 -0
  30. package/.docs/docs/getting-started/project-structure.md +60 -0
  31. package/.docs/docs/getting-started/start.md +28 -0
  32. package/.docs/docs/getting-started/studio.md +101 -0
  33. package/.docs/docs/index.md +43 -0
  34. package/.docs/docs/mastra-cloud/deployment.md +77 -0
  35. package/.docs/docs/mastra-cloud/observability.md +38 -0
  36. package/.docs/docs/mastra-cloud/overview.md +23 -0
  37. package/.docs/docs/mastra-cloud/setup.md +42 -0
  38. package/.docs/docs/mastra-cloud/studio.md +24 -0
  39. package/.docs/docs/mastra-code/configuration.md +299 -0
  40. package/.docs/docs/mastra-code/customization.md +228 -0
  41. package/.docs/docs/mastra-code/modes.md +104 -0
  42. package/.docs/docs/mastra-code/overview.md +135 -0
  43. package/.docs/docs/mastra-code/tools.md +229 -0
  44. package/.docs/docs/mcp/overview.md +373 -0
  45. package/.docs/docs/mcp/publishing-mcp-server.md +95 -0
  46. package/.docs/docs/memory/memory-processors.md +314 -0
  47. package/.docs/docs/memory/message-history.md +260 -0
  48. package/.docs/docs/memory/observational-memory.md +248 -0
  49. package/.docs/docs/memory/overview.md +45 -0
  50. package/.docs/docs/memory/semantic-recall.md +272 -0
  51. package/.docs/docs/memory/storage.md +261 -0
  52. package/.docs/docs/memory/working-memory.md +400 -0
  53. package/.docs/docs/observability/datasets/overview.md +198 -0
  54. package/.docs/docs/observability/datasets/running-experiments.md +274 -0
  55. package/.docs/docs/observability/logging.md +99 -0
  56. package/.docs/docs/observability/overview.md +70 -0
  57. package/.docs/docs/observability/tracing/bridges/otel.md +209 -0
  58. package/.docs/docs/observability/tracing/exporters/arize.md +272 -0
  59. package/.docs/docs/observability/tracing/exporters/braintrust.md +111 -0
  60. package/.docs/docs/observability/tracing/exporters/cloud.md +127 -0
  61. package/.docs/docs/observability/tracing/exporters/datadog.md +187 -0
  62. package/.docs/docs/observability/tracing/exporters/default.md +209 -0
  63. package/.docs/docs/observability/tracing/exporters/laminar.md +100 -0
  64. package/.docs/docs/observability/tracing/exporters/langfuse.md +213 -0
  65. package/.docs/docs/observability/tracing/exporters/langsmith.md +198 -0
  66. package/.docs/docs/observability/tracing/exporters/otel.md +476 -0
  67. package/.docs/docs/observability/tracing/exporters/posthog.md +148 -0
  68. package/.docs/docs/observability/tracing/exporters/sentry.md +208 -0
  69. package/.docs/docs/observability/tracing/overview.md +1112 -0
  70. package/.docs/docs/observability/tracing/processors/sensitive-data-filter.md +300 -0
  71. package/.docs/docs/rag/chunking-and-embedding.md +183 -0
  72. package/.docs/docs/rag/graph-rag.md +215 -0
  73. package/.docs/docs/rag/overview.md +72 -0
  74. package/.docs/docs/rag/retrieval.md +515 -0
  75. package/.docs/docs/rag/vector-databases.md +645 -0
  76. package/.docs/docs/server/auth/auth0.md +220 -0
  77. package/.docs/docs/server/auth/better-auth.md +203 -0
  78. package/.docs/docs/server/auth/clerk.md +132 -0
  79. package/.docs/docs/server/auth/composite-auth.md +234 -0
  80. package/.docs/docs/server/auth/custom-auth-provider.md +513 -0
  81. package/.docs/docs/server/auth/firebase.md +272 -0
  82. package/.docs/docs/server/auth/jwt.md +110 -0
  83. package/.docs/docs/server/auth/simple-auth.md +180 -0
  84. package/.docs/docs/server/auth/supabase.md +117 -0
  85. package/.docs/docs/server/auth/workos.md +186 -0
  86. package/.docs/docs/server/auth.md +38 -0
  87. package/.docs/docs/server/custom-adapters.md +378 -0
  88. package/.docs/docs/server/custom-api-routes.md +267 -0
  89. package/.docs/docs/server/mastra-client.md +243 -0
  90. package/.docs/docs/server/mastra-server.md +71 -0
  91. package/.docs/docs/server/middleware.md +225 -0
  92. package/.docs/docs/server/request-context.md +471 -0
  93. package/.docs/docs/server/server-adapters.md +547 -0
  94. package/.docs/docs/streaming/events.md +237 -0
  95. package/.docs/docs/streaming/overview.md +175 -0
  96. package/.docs/docs/streaming/tool-streaming.md +175 -0
  97. package/.docs/docs/streaming/workflow-streaming.md +109 -0
  98. package/.docs/docs/voice/overview.md +959 -0
  99. package/.docs/docs/voice/speech-to-speech.md +102 -0
  100. package/.docs/docs/voice/speech-to-text.md +79 -0
  101. package/.docs/docs/voice/text-to-speech.md +83 -0
  102. package/.docs/docs/workflows/agents-and-tools.md +166 -0
  103. package/.docs/docs/workflows/control-flow.md +822 -0
  104. package/.docs/docs/workflows/error-handling.md +360 -0
  105. package/.docs/docs/workflows/human-in-the-loop.md +215 -0
  106. package/.docs/docs/workflows/overview.md +370 -0
  107. package/.docs/docs/workflows/snapshots.md +238 -0
  108. package/.docs/docs/workflows/suspend-and-resume.md +205 -0
  109. package/.docs/docs/workflows/time-travel.md +309 -0
  110. package/.docs/docs/workflows/workflow-state.md +181 -0
  111. package/.docs/docs/workspace/filesystem.md +164 -0
  112. package/.docs/docs/workspace/overview.md +239 -0
  113. package/.docs/docs/workspace/sandbox.md +63 -0
  114. package/.docs/docs/workspace/search.md +243 -0
  115. package/.docs/docs/workspace/skills.md +169 -0
  116. package/.docs/guides/agent-frameworks/ai-sdk.md +140 -0
  117. package/.docs/guides/build-your-ui/ai-sdk-ui.md +1499 -0
  118. package/.docs/guides/build-your-ui/assistant-ui.md +156 -0
  119. package/.docs/guides/build-your-ui/copilotkit.md +289 -0
  120. package/.docs/guides/deployment/amazon-ec2.md +130 -0
  121. package/.docs/guides/deployment/aws-lambda.md +248 -0
  122. package/.docs/guides/deployment/azure-app-services.md +114 -0
  123. package/.docs/guides/deployment/cloudflare.md +99 -0
  124. package/.docs/guides/deployment/digital-ocean.md +168 -0
  125. package/.docs/guides/deployment/inngest.md +682 -0
  126. package/.docs/guides/deployment/netlify.md +77 -0
  127. package/.docs/guides/deployment/vercel.md +101 -0
  128. package/.docs/guides/getting-started/astro.md +398 -0
  129. package/.docs/guides/getting-started/electron.md +504 -0
  130. package/.docs/guides/getting-started/express.md +251 -0
  131. package/.docs/guides/getting-started/hono.md +190 -0
  132. package/.docs/guides/getting-started/next-js.md +347 -0
  133. package/.docs/guides/getting-started/nuxt.md +497 -0
  134. package/.docs/guides/getting-started/quickstart.md +67 -0
  135. package/.docs/guides/getting-started/sveltekit.md +296 -0
  136. package/.docs/guides/getting-started/vite-react.md +425 -0
  137. package/.docs/guides/guide/ai-recruiter.md +226 -0
  138. package/.docs/guides/guide/chef-michel.md +211 -0
  139. package/.docs/guides/guide/code-review-bot.md +226 -0
  140. package/.docs/guides/guide/dev-assistant.md +307 -0
  141. package/.docs/guides/guide/docs-manager.md +238 -0
  142. package/.docs/guides/guide/github-actions-pr-description.md +236 -0
  143. package/.docs/guides/guide/notes-mcp-server.md +416 -0
  144. package/.docs/guides/guide/research-assistant.md +348 -0
  145. package/.docs/guides/guide/research-coordinator.md +416 -0
  146. package/.docs/guides/guide/stock-agent.md +132 -0
  147. package/.docs/guides/guide/web-search.md +320 -0
  148. package/.docs/guides/guide/whatsapp-chat-bot.md +405 -0
  149. package/.docs/guides/index.md +3 -0
  150. package/.docs/guides/migrations/agentnetwork.md +97 -0
  151. package/.docs/guides/migrations/ai-sdk-v4-to-v5.md +112 -0
  152. package/.docs/guides/migrations/network-to-supervisor.md +261 -0
  153. package/.docs/guides/migrations/upgrade-to-v1/agent.md +404 -0
  154. package/.docs/guides/migrations/upgrade-to-v1/cli.md +57 -0
  155. package/.docs/guides/migrations/upgrade-to-v1/client.md +337 -0
  156. package/.docs/guides/migrations/upgrade-to-v1/deployment.md +37 -0
  157. package/.docs/guides/migrations/upgrade-to-v1/evals.md +239 -0
  158. package/.docs/guides/migrations/upgrade-to-v1/mastra.md +143 -0
  159. package/.docs/guides/migrations/upgrade-to-v1/mcp.md +97 -0
  160. package/.docs/guides/migrations/upgrade-to-v1/memory.md +285 -0
  161. package/.docs/guides/migrations/upgrade-to-v1/overview.md +119 -0
  162. package/.docs/guides/migrations/upgrade-to-v1/processors.md +68 -0
  163. package/.docs/guides/migrations/upgrade-to-v1/rag.md +42 -0
  164. package/.docs/guides/migrations/upgrade-to-v1/storage.md +553 -0
  165. package/.docs/guides/migrations/upgrade-to-v1/tools.md +180 -0
  166. package/.docs/guides/migrations/upgrade-to-v1/tracing.md +412 -0
  167. package/.docs/guides/migrations/upgrade-to-v1/vectors.md +87 -0
  168. package/.docs/guides/migrations/upgrade-to-v1/voice.md +30 -0
  169. package/.docs/guides/migrations/upgrade-to-v1/workflows.md +341 -0
  170. package/.docs/guides/migrations/vnext-to-standard-apis.md +362 -0
  171. package/.docs/models/embeddings.md +161 -0
  172. package/.docs/models/gateways/azure-openai.md +128 -0
  173. package/.docs/models/gateways/custom-gateways.md +545 -0
  174. package/.docs/models/gateways/netlify.md +88 -0
  175. package/.docs/models/gateways/openrouter.md +219 -0
  176. package/.docs/models/gateways/vercel.md +225 -0
  177. package/.docs/models/gateways.md +14 -0
  178. package/.docs/models/index.md +286 -0
  179. package/.docs/models/providers/302ai.md +134 -0
  180. package/.docs/models/providers/abacus.md +125 -0
  181. package/.docs/models/providers/agentrouter.md +90 -0
  182. package/.docs/models/providers/aihubmix.md +107 -0
  183. package/.docs/models/providers/alibaba-cn.md +135 -0
  184. package/.docs/models/providers/alibaba.md +111 -0
  185. package/.docs/models/providers/amazon-bedrock.md +33 -0
  186. package/.docs/models/providers/anthropic.md +153 -0
  187. package/.docs/models/providers/azure.md +33 -0
  188. package/.docs/models/providers/bailing.md +72 -0
  189. package/.docs/models/providers/baseten.md +77 -0
  190. package/.docs/models/providers/berget.md +78 -0
  191. package/.docs/models/providers/cerebras.md +101 -0
  192. package/.docs/models/providers/chutes.md +136 -0
  193. package/.docs/models/providers/cloudflare-ai-gateway.md +33 -0
  194. package/.docs/models/providers/cloudflare-workers-ai.md +109 -0
  195. package/.docs/models/providers/cohere.md +33 -0
  196. package/.docs/models/providers/cortecs.md +91 -0
  197. package/.docs/models/providers/deepinfra.md +112 -0
  198. package/.docs/models/providers/deepseek.md +88 -0
  199. package/.docs/models/providers/fastrouter.md +84 -0
  200. package/.docs/models/providers/fireworks-ai.md +89 -0
  201. package/.docs/models/providers/firmware.md +85 -0
  202. package/.docs/models/providers/friendli.md +78 -0
  203. package/.docs/models/providers/github-models.md +125 -0
  204. package/.docs/models/providers/google-vertex.md +33 -0
  205. package/.docs/models/providers/google.md +159 -0
  206. package/.docs/models/providers/groq.md +107 -0
  207. package/.docs/models/providers/helicone.md +161 -0
  208. package/.docs/models/providers/huggingface.md +90 -0
  209. package/.docs/models/providers/iflowcn.md +84 -0
  210. package/.docs/models/providers/inception.md +72 -0
  211. package/.docs/models/providers/inference.md +79 -0
  212. package/.docs/models/providers/io-intelligence.md +87 -0
  213. package/.docs/models/providers/io-net.md +87 -0
  214. package/.docs/models/providers/jiekou.md +131 -0
  215. package/.docs/models/providers/kilo.md +333 -0
  216. package/.docs/models/providers/kimi-for-coding.md +100 -0
  217. package/.docs/models/providers/kuae-cloud-coding-plan.md +71 -0
  218. package/.docs/models/providers/llama.md +77 -0
  219. package/.docs/models/providers/lmstudio.md +73 -0
  220. package/.docs/models/providers/lucidquery.md +72 -0
  221. package/.docs/models/providers/minimax-cn-coding-plan.md +102 -0
  222. package/.docs/models/providers/minimax-cn.md +102 -0
  223. package/.docs/models/providers/minimax-coding-plan.md +102 -0
  224. package/.docs/models/providers/minimax.md +104 -0
  225. package/.docs/models/providers/mistral.md +124 -0
  226. package/.docs/models/providers/moark.md +72 -0
  227. package/.docs/models/providers/modelscope.md +77 -0
  228. package/.docs/models/providers/moonshotai-cn.md +76 -0
  229. package/.docs/models/providers/moonshotai.md +76 -0
  230. package/.docs/models/providers/morph.md +73 -0
  231. package/.docs/models/providers/nano-gpt.md +103 -0
  232. package/.docs/models/providers/nebius.md +116 -0
  233. package/.docs/models/providers/nova.md +72 -0
  234. package/.docs/models/providers/novita-ai.md +154 -0
  235. package/.docs/models/providers/nvidia.md +141 -0
  236. package/.docs/models/providers/ollama-cloud.md +103 -0
  237. package/.docs/models/providers/ollama.md +33 -0
  238. package/.docs/models/providers/openai.md +193 -0
  239. package/.docs/models/providers/opencode.md +100 -0
  240. package/.docs/models/providers/ovhcloud.md +83 -0
  241. package/.docs/models/providers/perplexity.md +100 -0
  242. package/.docs/models/providers/poe.md +183 -0
  243. package/.docs/models/providers/privatemode-ai.md +75 -0
  244. package/.docs/models/providers/requesty.md +90 -0
  245. package/.docs/models/providers/scaleway.md +84 -0
  246. package/.docs/models/providers/siliconflow-cn.md +138 -0
  247. package/.docs/models/providers/siliconflow.md +140 -0
  248. package/.docs/models/providers/stackit.md +78 -0
  249. package/.docs/models/providers/stepfun.md +73 -0
  250. package/.docs/models/providers/submodel.md +79 -0
  251. package/.docs/models/providers/synthetic.md +96 -0
  252. package/.docs/models/providers/togetherai.md +115 -0
  253. package/.docs/models/providers/upstage.md +73 -0
  254. package/.docs/models/providers/venice.md +95 -0
  255. package/.docs/models/providers/vivgrid.md +106 -0
  256. package/.docs/models/providers/vultr.md +75 -0
  257. package/.docs/models/providers/wandb.md +80 -0
  258. package/.docs/models/providers/xai.md +141 -0
  259. package/.docs/models/providers/xiaomi.md +71 -0
  260. package/.docs/models/providers/zai-coding-plan.md +80 -0
  261. package/.docs/models/providers/zai.md +79 -0
  262. package/.docs/models/providers/zenmux.md +161 -0
  263. package/.docs/models/providers/zhipuai-coding-plan.md +79 -0
  264. package/.docs/models/providers/zhipuai.md +79 -0
  265. package/.docs/models/providers.md +81 -0
  266. package/.docs/reference/agents/agent.md +141 -0
  267. package/.docs/reference/agents/generate.md +186 -0
  268. package/.docs/reference/agents/generateLegacy.md +173 -0
  269. package/.docs/reference/agents/getDefaultGenerateOptions.md +36 -0
  270. package/.docs/reference/agents/getDefaultOptions.md +34 -0
  271. package/.docs/reference/agents/getDefaultStreamOptions.md +36 -0
  272. package/.docs/reference/agents/getDescription.md +21 -0
  273. package/.docs/reference/agents/getInstructions.md +34 -0
  274. package/.docs/reference/agents/getLLM.md +37 -0
  275. package/.docs/reference/agents/getMemory.md +34 -0
  276. package/.docs/reference/agents/getModel.md +34 -0
  277. package/.docs/reference/agents/getTools.md +29 -0
  278. package/.docs/reference/agents/getVoice.md +34 -0
  279. package/.docs/reference/agents/listAgents.md +35 -0
  280. package/.docs/reference/agents/listScorers.md +34 -0
  281. package/.docs/reference/agents/listTools.md +34 -0
  282. package/.docs/reference/agents/listWorkflows.md +34 -0
  283. package/.docs/reference/agents/network.md +133 -0
  284. package/.docs/reference/ai-sdk/chat-route.md +82 -0
  285. package/.docs/reference/ai-sdk/handle-chat-stream.md +53 -0
  286. package/.docs/reference/ai-sdk/handle-network-stream.md +37 -0
  287. package/.docs/reference/ai-sdk/handle-workflow-stream.md +55 -0
  288. package/.docs/reference/ai-sdk/network-route.md +74 -0
  289. package/.docs/reference/ai-sdk/to-ai-sdk-stream.md +231 -0
  290. package/.docs/reference/ai-sdk/to-ai-sdk-v4-messages.md +79 -0
  291. package/.docs/reference/ai-sdk/to-ai-sdk-v5-messages.md +76 -0
  292. package/.docs/reference/ai-sdk/with-mastra.md +59 -0
  293. package/.docs/reference/ai-sdk/workflow-route.md +79 -0
  294. package/.docs/reference/auth/auth0.md +73 -0
  295. package/.docs/reference/auth/better-auth.md +71 -0
  296. package/.docs/reference/auth/clerk.md +36 -0
  297. package/.docs/reference/auth/firebase.md +80 -0
  298. package/.docs/reference/auth/jwt.md +26 -0
  299. package/.docs/reference/auth/supabase.md +33 -0
  300. package/.docs/reference/auth/workos.md +84 -0
  301. package/.docs/reference/cli/create-mastra.md +137 -0
  302. package/.docs/reference/cli/mastra.md +336 -0
  303. package/.docs/reference/client-js/agents.md +437 -0
  304. package/.docs/reference/client-js/error-handling.md +16 -0
  305. package/.docs/reference/client-js/logs.md +24 -0
  306. package/.docs/reference/client-js/mastra-client.md +63 -0
  307. package/.docs/reference/client-js/memory.md +221 -0
  308. package/.docs/reference/client-js/observability.md +72 -0
  309. package/.docs/reference/client-js/telemetry.md +20 -0
  310. package/.docs/reference/client-js/tools.md +44 -0
  311. package/.docs/reference/client-js/vectors.md +79 -0
  312. package/.docs/reference/client-js/workflows.md +199 -0
  313. package/.docs/reference/configuration.md +752 -0
  314. package/.docs/reference/core/addGateway.md +42 -0
  315. package/.docs/reference/core/getAgent.md +21 -0
  316. package/.docs/reference/core/getAgentById.md +21 -0
  317. package/.docs/reference/core/getDeployer.md +22 -0
  318. package/.docs/reference/core/getGateway.md +38 -0
  319. package/.docs/reference/core/getGatewayById.md +41 -0
  320. package/.docs/reference/core/getLogger.md +22 -0
  321. package/.docs/reference/core/getMCPServer.md +47 -0
  322. package/.docs/reference/core/getMCPServerById.md +55 -0
  323. package/.docs/reference/core/getMemory.md +50 -0
  324. package/.docs/reference/core/getScorer.md +54 -0
  325. package/.docs/reference/core/getScorerById.md +54 -0
  326. package/.docs/reference/core/getServer.md +22 -0
  327. package/.docs/reference/core/getStorage.md +22 -0
  328. package/.docs/reference/core/getStoredAgentById.md +89 -0
  329. package/.docs/reference/core/getTelemetry.md +22 -0
  330. package/.docs/reference/core/getVector.md +22 -0
  331. package/.docs/reference/core/getWorkflow.md +42 -0
  332. package/.docs/reference/core/listAgents.md +21 -0
  333. package/.docs/reference/core/listGateways.md +40 -0
  334. package/.docs/reference/core/listLogs.md +38 -0
  335. package/.docs/reference/core/listLogsByRunId.md +36 -0
  336. package/.docs/reference/core/listMCPServers.md +55 -0
  337. package/.docs/reference/core/listMemory.md +56 -0
  338. package/.docs/reference/core/listScorers.md +29 -0
  339. package/.docs/reference/core/listStoredAgents.md +93 -0
  340. package/.docs/reference/core/listVectors.md +22 -0
  341. package/.docs/reference/core/listWorkflows.md +21 -0
  342. package/.docs/reference/core/mastra-class.md +66 -0
  343. package/.docs/reference/core/mastra-model-gateway.md +153 -0
  344. package/.docs/reference/core/setLogger.md +26 -0
  345. package/.docs/reference/core/setStorage.md +27 -0
  346. package/.docs/reference/datasets/addItem.md +37 -0
  347. package/.docs/reference/datasets/addItems.md +35 -0
  348. package/.docs/reference/datasets/compareExperiments.md +52 -0
  349. package/.docs/reference/datasets/create.md +51 -0
  350. package/.docs/reference/datasets/dataset.md +82 -0
  351. package/.docs/reference/datasets/datasets-manager.md +94 -0
  352. package/.docs/reference/datasets/delete.md +25 -0
  353. package/.docs/reference/datasets/deleteExperiment.md +27 -0
  354. package/.docs/reference/datasets/deleteItem.md +27 -0
  355. package/.docs/reference/datasets/deleteItems.md +29 -0
  356. package/.docs/reference/datasets/get.md +31 -0
  357. package/.docs/reference/datasets/getDetails.md +47 -0
  358. package/.docs/reference/datasets/getExperiment.md +30 -0
  359. package/.docs/reference/datasets/getItem.md +33 -0
  360. package/.docs/reference/datasets/getItemHistory.md +31 -0
  361. package/.docs/reference/datasets/list.md +31 -0
  362. package/.docs/reference/datasets/listExperimentResults.md +39 -0
  363. package/.docs/reference/datasets/listExperiments.md +33 -0
  364. package/.docs/reference/datasets/listItems.md +46 -0
  365. package/.docs/reference/datasets/listVersions.md +33 -0
  366. package/.docs/reference/datasets/startExperiment.md +62 -0
  367. package/.docs/reference/datasets/startExperimentAsync.md +43 -0
  368. package/.docs/reference/datasets/update.md +48 -0
  369. package/.docs/reference/datasets/updateItem.md +38 -0
  370. package/.docs/reference/deployer/cloudflare.md +79 -0
  371. package/.docs/reference/deployer/netlify.md +80 -0
  372. package/.docs/reference/deployer/vercel.md +91 -0
  373. package/.docs/reference/deployer.md +100 -0
  374. package/.docs/reference/evals/answer-relevancy.md +105 -0
  375. package/.docs/reference/evals/answer-similarity.md +99 -0
  376. package/.docs/reference/evals/bias.md +120 -0
  377. package/.docs/reference/evals/completeness.md +136 -0
  378. package/.docs/reference/evals/content-similarity.md +101 -0
  379. package/.docs/reference/evals/context-precision.md +196 -0
  380. package/.docs/reference/evals/context-relevance.md +531 -0
  381. package/.docs/reference/evals/create-scorer.md +270 -0
  382. package/.docs/reference/evals/faithfulness.md +114 -0
  383. package/.docs/reference/evals/hallucination.md +213 -0
  384. package/.docs/reference/evals/keyword-coverage.md +128 -0
  385. package/.docs/reference/evals/mastra-scorer.md +123 -0
  386. package/.docs/reference/evals/noise-sensitivity.md +675 -0
  387. package/.docs/reference/evals/prompt-alignment.md +614 -0
  388. package/.docs/reference/evals/run-evals.md +179 -0
  389. package/.docs/reference/evals/scorer-utils.md +326 -0
  390. package/.docs/reference/evals/textual-difference.md +113 -0
  391. package/.docs/reference/evals/tone-consistency.md +119 -0
  392. package/.docs/reference/evals/tool-call-accuracy.md +533 -0
  393. package/.docs/reference/evals/toxicity.md +123 -0
  394. package/.docs/reference/harness/harness-class.md +708 -0
  395. package/.docs/reference/index.md +277 -0
  396. package/.docs/reference/logging/pino-logger.md +117 -0
  397. package/.docs/reference/mastra-code/createMastraCode.md +108 -0
  398. package/.docs/reference/memory/clone-utilities.md +199 -0
  399. package/.docs/reference/memory/cloneThread.md +130 -0
  400. package/.docs/reference/memory/createThread.md +68 -0
  401. package/.docs/reference/memory/deleteMessages.md +38 -0
  402. package/.docs/reference/memory/getThreadById.md +24 -0
  403. package/.docs/reference/memory/listThreads.md +145 -0
  404. package/.docs/reference/memory/memory-class.md +147 -0
  405. package/.docs/reference/memory/observational-memory.md +565 -0
  406. package/.docs/reference/memory/recall.md +91 -0
  407. package/.docs/reference/observability/tracing/bridges/otel.md +131 -0
  408. package/.docs/reference/observability/tracing/configuration.md +178 -0
  409. package/.docs/reference/observability/tracing/exporters/arize.md +141 -0
  410. package/.docs/reference/observability/tracing/exporters/braintrust.md +93 -0
  411. package/.docs/reference/observability/tracing/exporters/cloud-exporter.md +163 -0
  412. package/.docs/reference/observability/tracing/exporters/console-exporter.md +138 -0
  413. package/.docs/reference/observability/tracing/exporters/datadog.md +116 -0
  414. package/.docs/reference/observability/tracing/exporters/default-exporter.md +174 -0
  415. package/.docs/reference/observability/tracing/exporters/laminar.md +78 -0
  416. package/.docs/reference/observability/tracing/exporters/langfuse.md +134 -0
  417. package/.docs/reference/observability/tracing/exporters/langsmith.md +108 -0
  418. package/.docs/reference/observability/tracing/exporters/otel.md +199 -0
  419. package/.docs/reference/observability/tracing/exporters/posthog.md +92 -0
  420. package/.docs/reference/observability/tracing/exporters/sentry.md +184 -0
  421. package/.docs/reference/observability/tracing/instances.md +107 -0
  422. package/.docs/reference/observability/tracing/interfaces.md +743 -0
  423. package/.docs/reference/observability/tracing/processors/sensitive-data-filter.md +144 -0
  424. package/.docs/reference/observability/tracing/spans.md +224 -0
  425. package/.docs/reference/processors/batch-parts-processor.md +61 -0
  426. package/.docs/reference/processors/language-detector.md +82 -0
  427. package/.docs/reference/processors/message-history-processor.md +85 -0
  428. package/.docs/reference/processors/moderation-processor.md +104 -0
  429. package/.docs/reference/processors/pii-detector.md +108 -0
  430. package/.docs/reference/processors/processor-interface.md +521 -0
  431. package/.docs/reference/processors/prompt-injection-detector.md +72 -0
  432. package/.docs/reference/processors/semantic-recall-processor.md +117 -0
  433. package/.docs/reference/processors/system-prompt-scrubber.md +80 -0
  434. package/.docs/reference/processors/token-limiter-processor.md +115 -0
  435. package/.docs/reference/processors/tool-call-filter.md +85 -0
  436. package/.docs/reference/processors/tool-search-processor.md +111 -0
  437. package/.docs/reference/processors/unicode-normalizer.md +62 -0
  438. package/.docs/reference/processors/working-memory-processor.md +152 -0
  439. package/.docs/reference/rag/chunk.md +221 -0
  440. package/.docs/reference/rag/database-config.md +261 -0
  441. package/.docs/reference/rag/document.md +114 -0
  442. package/.docs/reference/rag/embeddings.md +92 -0
  443. package/.docs/reference/rag/extract-params.md +168 -0
  444. package/.docs/reference/rag/graph-rag.md +111 -0
  445. package/.docs/reference/rag/metadata-filters.md +216 -0
  446. package/.docs/reference/rag/rerank.md +75 -0
  447. package/.docs/reference/rag/rerankWithScorer.md +80 -0
  448. package/.docs/reference/server/create-route.md +262 -0
  449. package/.docs/reference/server/express-adapter.md +176 -0
  450. package/.docs/reference/server/fastify-adapter.md +90 -0
  451. package/.docs/reference/server/hono-adapter.md +162 -0
  452. package/.docs/reference/server/koa-adapter.md +127 -0
  453. package/.docs/reference/server/mastra-server.md +298 -0
  454. package/.docs/reference/server/register-api-route.md +249 -0
  455. package/.docs/reference/server/routes.md +306 -0
  456. package/.docs/reference/storage/cloudflare-d1.md +218 -0
  457. package/.docs/reference/storage/cloudflare.md +88 -0
  458. package/.docs/reference/storage/composite.md +235 -0
  459. package/.docs/reference/storage/convex.md +161 -0
  460. package/.docs/reference/storage/dynamodb.md +282 -0
  461. package/.docs/reference/storage/lance.md +131 -0
  462. package/.docs/reference/storage/libsql.md +135 -0
  463. package/.docs/reference/storage/mongodb.md +262 -0
  464. package/.docs/reference/storage/mssql.md +157 -0
  465. package/.docs/reference/storage/overview.md +121 -0
  466. package/.docs/reference/storage/postgresql.md +526 -0
  467. package/.docs/reference/storage/upstash.md +160 -0
  468. package/.docs/reference/streaming/ChunkType.md +292 -0
  469. package/.docs/reference/streaming/agents/MastraModelOutput.md +182 -0
  470. package/.docs/reference/streaming/agents/stream.md +221 -0
  471. package/.docs/reference/streaming/agents/streamLegacy.md +142 -0
  472. package/.docs/reference/streaming/workflows/observeStream.md +42 -0
  473. package/.docs/reference/streaming/workflows/resumeStream.md +61 -0
  474. package/.docs/reference/streaming/workflows/stream.md +88 -0
  475. package/.docs/reference/streaming/workflows/timeTravelStream.md +142 -0
  476. package/.docs/reference/templates/overview.md +194 -0
  477. package/.docs/reference/tools/create-tool.md +237 -0
  478. package/.docs/reference/tools/document-chunker-tool.md +89 -0
  479. package/.docs/reference/tools/graph-rag-tool.md +182 -0
  480. package/.docs/reference/tools/mcp-client.md +954 -0
  481. package/.docs/reference/tools/mcp-server.md +1271 -0
  482. package/.docs/reference/tools/vector-query-tool.md +459 -0
  483. package/.docs/reference/vectors/astra.md +121 -0
  484. package/.docs/reference/vectors/chroma.md +264 -0
  485. package/.docs/reference/vectors/convex.md +300 -0
  486. package/.docs/reference/vectors/couchbase.md +226 -0
  487. package/.docs/reference/vectors/duckdb.md +318 -0
  488. package/.docs/reference/vectors/elasticsearch.md +189 -0
  489. package/.docs/reference/vectors/lance.md +220 -0
  490. package/.docs/reference/vectors/libsql.md +305 -0
  491. package/.docs/reference/vectors/mongodb.md +295 -0
  492. package/.docs/reference/vectors/opensearch.md +99 -0
  493. package/.docs/reference/vectors/pg.md +408 -0
  494. package/.docs/reference/vectors/pinecone.md +168 -0
  495. package/.docs/reference/vectors/qdrant.md +222 -0
  496. package/.docs/reference/vectors/s3vectors.md +277 -0
  497. package/.docs/reference/vectors/turbopuffer.md +157 -0
  498. package/.docs/reference/vectors/upstash.md +294 -0
  499. package/.docs/reference/vectors/vectorize.md +147 -0
  500. package/.docs/reference/voice/azure.md +148 -0
  501. package/.docs/reference/voice/cloudflare.md +83 -0
  502. package/.docs/reference/voice/composite-voice.md +121 -0
  503. package/.docs/reference/voice/deepgram.md +79 -0
  504. package/.docs/reference/voice/elevenlabs.md +98 -0
  505. package/.docs/reference/voice/google-gemini-live.md +378 -0
  506. package/.docs/reference/voice/google.md +228 -0
  507. package/.docs/reference/voice/mastra-voice.md +311 -0
  508. package/.docs/reference/voice/murf.md +122 -0
  509. package/.docs/reference/voice/openai-realtime.md +203 -0
  510. package/.docs/reference/voice/openai.md +88 -0
  511. package/.docs/reference/voice/playai.md +80 -0
  512. package/.docs/reference/voice/sarvam.md +126 -0
  513. package/.docs/reference/voice/speechify.md +75 -0
  514. package/.docs/reference/voice/voice.addInstructions.md +55 -0
  515. package/.docs/reference/voice/voice.addTools.md +67 -0
  516. package/.docs/reference/voice/voice.answer.md +54 -0
  517. package/.docs/reference/voice/voice.close.md +51 -0
  518. package/.docs/reference/voice/voice.connect.md +94 -0
  519. package/.docs/reference/voice/voice.events.md +37 -0
  520. package/.docs/reference/voice/voice.getSpeakers.md +129 -0
  521. package/.docs/reference/voice/voice.listen.md +164 -0
  522. package/.docs/reference/voice/voice.off.md +54 -0
  523. package/.docs/reference/voice/voice.on.md +111 -0
  524. package/.docs/reference/voice/voice.send.md +65 -0
  525. package/.docs/reference/voice/voice.speak.md +157 -0
  526. package/.docs/reference/voice/voice.updateConfig.md +60 -0
  527. package/.docs/reference/workflows/run-methods/cancel.md +86 -0
  528. package/.docs/reference/workflows/run-methods/restart.md +33 -0
  529. package/.docs/reference/workflows/run-methods/resume.md +59 -0
  530. package/.docs/reference/workflows/run-methods/start.md +58 -0
  531. package/.docs/reference/workflows/run-methods/startAsync.md +67 -0
  532. package/.docs/reference/workflows/run-methods/timeTravel.md +142 -0
  533. package/.docs/reference/workflows/run.md +59 -0
  534. package/.docs/reference/workflows/step.md +119 -0
  535. package/.docs/reference/workflows/workflow-methods/branch.md +25 -0
  536. package/.docs/reference/workflows/workflow-methods/commit.md +17 -0
  537. package/.docs/reference/workflows/workflow-methods/create-run.md +63 -0
  538. package/.docs/reference/workflows/workflow-methods/dountil.md +25 -0
  539. package/.docs/reference/workflows/workflow-methods/dowhile.md +25 -0
  540. package/.docs/reference/workflows/workflow-methods/foreach.md +118 -0
  541. package/.docs/reference/workflows/workflow-methods/map.md +93 -0
  542. package/.docs/reference/workflows/workflow-methods/parallel.md +21 -0
  543. package/.docs/reference/workflows/workflow-methods/sleep.md +35 -0
  544. package/.docs/reference/workflows/workflow-methods/sleepUntil.md +35 -0
  545. package/.docs/reference/workflows/workflow-methods/then.md +21 -0
  546. package/.docs/reference/workflows/workflow.md +157 -0
  547. package/.docs/reference/workspace/e2b-sandbox.md +289 -0
  548. package/.docs/reference/workspace/filesystem.md +255 -0
  549. package/.docs/reference/workspace/gcs-filesystem.md +174 -0
  550. package/.docs/reference/workspace/local-filesystem.md +343 -0
  551. package/.docs/reference/workspace/local-sandbox.md +301 -0
  552. package/.docs/reference/workspace/s3-filesystem.md +175 -0
  553. package/.docs/reference/workspace/sandbox.md +87 -0
  554. package/.docs/reference/workspace/workspace-class.md +244 -0
  555. package/CHANGELOG.md +8 -0
  556. package/package.json +5 -5
@@ -0,0 +1,119 @@
1
+ # Tone Consistency Scorer
2
+
3
+ The `createToneScorer()` function evaluates the text's emotional tone and sentiment consistency. It can operate in two modes: comparing tone between input/output pairs or analyzing tone stability within a single text.
4
+
5
+ ## Parameters
6
+
7
+ The `createToneScorer()` function does not take any options.
8
+
9
+ This function returns an instance of the MastraScorer class. See the [MastraScorer reference](https://mastra.ai/reference/evals/mastra-scorer) for details on the `.run()` method and its input/output.
10
+
11
+ ## .run() Returns
12
+
13
+ **runId:** (`string`): The id of the run (optional).
14
+
15
+ **analyzeStepResult:** (`object`): Object with tone metrics: { responseSentiment: number, referenceSentiment: number, difference: number } (for comparison mode) OR { avgSentiment: number, sentimentVariance: number } (for stability mode)
16
+
17
+ **score:** (`number`): Tone consistency/stability score (0-1).
18
+
19
+ `.run()` returns a result in the following shape:
20
+
21
+ ```typescript
22
+ {
23
+ runId: string,
24
+ analyzeStepResult: {
25
+ responseSentiment?: number,
26
+ referenceSentiment?: number,
27
+ difference?: number,
28
+ avgSentiment?: number,
29
+ sentimentVariance?: number,
30
+ },
31
+ score: number
32
+ }
33
+ ```
34
+
35
+ ## Scoring Details
36
+
37
+ The scorer evaluates sentiment consistency through tone pattern analysis and mode-specific scoring.
38
+
39
+ ### Scoring Process
40
+
41
+ 1. Analyzes tone patterns:
42
+
43
+ - Extracts sentiment features
44
+ - Computes sentiment scores
45
+ - Measures tone variations
46
+
47
+ 2. Calculates mode-specific score: **Tone Consistency** (input and output):
48
+
49
+ - Compares sentiment between texts
50
+ - Calculates sentiment difference
51
+ - Score = 1 - (sentiment\_difference / max\_difference) **Tone Stability** (single input):
52
+ - Analyzes sentiment across sentences
53
+ - Calculates sentiment variance
54
+ - Score = 1 - (sentiment\_variance / max\_variance)
55
+
56
+ Final score: `mode_specific_score * scale`
57
+
58
+ ### Score interpretation
59
+
60
+ (0 to scale, default 0-1)
61
+
62
+ - 1.0: Perfect tone consistency/stability
63
+ - 0.7-0.9: Strong consistency with minor variations
64
+ - 0.4-0.6: Moderate consistency with noticeable shifts
65
+ - 0.1-0.3: Poor consistency with major tone changes
66
+ - 0.0: No consistency - completely different tones
67
+
68
+ ### analyzeStepResult
69
+
70
+ Object with tone metrics:
71
+
72
+ - **responseSentiment**: Sentiment score for the response (comparison mode).
73
+ - **referenceSentiment**: Sentiment score for the input/reference (comparison mode).
74
+ - **difference**: Absolute difference between sentiment scores (comparison mode).
75
+ - **avgSentiment**: Average sentiment across sentences (stability mode).
76
+ - **sentimentVariance**: Variance of sentiment across sentences (stability mode).
77
+
78
+ ## Example
79
+
80
+ Evaluate tone consistency between related agent responses:
81
+
82
+ ```typescript
83
+ import { runEvals } from '@mastra/core/evals'
84
+ import { createToneScorer } from '@mastra/evals/scorers/prebuilt'
85
+ import { myAgent } from './agent'
86
+
87
+ const scorer = createToneScorer()
88
+
89
+ const result = await runEvals({
90
+ data: [
91
+ {
92
+ input: 'How was your experience with our service?',
93
+ groundTruth: 'The service was excellent and exceeded expectations!',
94
+ },
95
+ {
96
+ input: 'Tell me about the customer support',
97
+ groundTruth: 'The support team was friendly and very helpful.',
98
+ },
99
+ ],
100
+ scorers: [scorer],
101
+ target: myAgent,
102
+ onItemComplete: ({ scorerResults }) => {
103
+ console.log({
104
+ score: scorerResults[scorer.id].score,
105
+ })
106
+ },
107
+ })
108
+
109
+ console.log(result.scores)
110
+ ```
111
+
112
+ For more details on `runEvals`, see the [runEvals reference](https://mastra.ai/reference/evals/run-evals).
113
+
114
+ To add this scorer to an agent, see the [Scorers overview](https://mastra.ai/docs/evals/overview) guide.
115
+
116
+ ## Related
117
+
118
+ - [Content Similarity Scorer](https://mastra.ai/reference/evals/content-similarity)
119
+ - [Toxicity Scorer](https://mastra.ai/reference/evals/toxicity)
@@ -0,0 +1,533 @@
1
+ # Tool Call Accuracy Scorers
2
+
3
+ Mastra provides two tool call accuracy scorers for evaluating whether an LLM selects the correct tools from available options:
4
+
5
+ 1. **Code-based scorer** - Deterministic evaluation using exact tool matching
6
+ 2. **LLM-based scorer** - Semantic evaluation using AI to assess appropriateness
7
+
8
+ ## Choosing Between Scorers
9
+
10
+ ### Use the Code-Based Scorer When:
11
+
12
+ - You need **deterministic, reproducible** results
13
+ - You want to test **exact tool matching**
14
+ - You need to validate **specific tool sequences**
15
+ - Speed and cost are priorities (no LLM calls)
16
+ - You're running automated tests
17
+
18
+ ### Use the LLM-Based Scorer When:
19
+
20
+ - You need **semantic understanding** of appropriateness
21
+ - Tool selection depends on **context and intent**
22
+ - You want to handle **edge cases** like clarification requests
23
+ - You need **explanations** for scoring decisions
24
+ - You're evaluating **production agent behavior**
25
+
26
+ ## Code-Based Tool Call Accuracy Scorer
27
+
28
+ The `createToolCallAccuracyScorerCode()` function from `@mastra/evals/scorers/prebuilt` provides deterministic binary scoring based on exact tool matching and supports both strict and lenient evaluation modes, as well as tool calling order validation.
29
+
30
+ ### Parameters
31
+
32
+ **expectedTool:** (`string`): The name of the tool that should be called for the given task. Ignored when expectedToolOrder is provided.
33
+
34
+ **strictMode:** (`boolean`): Controls evaluation strictness. For single tool mode: only exact single tool calls accepted. For order checking mode: tools must match exactly with no extra tools allowed.
35
+
36
+ **expectedToolOrder:** (`string[]`): Array of tool names in the expected calling order. When provided, enables order checking mode and ignores expectedTool parameter.
37
+
38
+ This function returns an instance of the MastraScorer class. See the [MastraScorer reference](https://mastra.ai/reference/evals/mastra-scorer) for details on the `.run()` method and its input/output.
39
+
40
+ ### Evaluation Modes
41
+
42
+ The code-based scorer operates in two distinct modes:
43
+
44
+ #### Single Tool Mode
45
+
46
+ When `expectedToolOrder` is not provided, the scorer evaluates single tool selection:
47
+
48
+ - **Standard Mode (strictMode: false)**: Returns `1` if the expected tool is called, regardless of other tools
49
+ - **Strict Mode (strictMode: true)**: Returns `1` only if exactly one tool is called and it matches the expected tool
50
+
51
+ #### Order Checking Mode
52
+
53
+ When `expectedToolOrder` is provided, the scorer validates tool calling sequence:
54
+
55
+ - **Strict Order (strictMode: true)**: Tools must be called in exactly the specified order with no extra tools
56
+ - **Flexible Order (strictMode: false)**: Expected tools must appear in correct relative order (extra tools allowed)
57
+
58
+ ## Code-Based Scoring Details
59
+
60
+ - **Binary scores**: Always returns 0 or 1
61
+ - **Deterministic**: Same input always produces same output
62
+ - **Fast**: No external API calls
63
+
64
+ ### Code-Based Scorer Options
65
+
66
+ ```typescript
67
+ // Standard mode - passes if expected tool is called
68
+ const lenientScorer = createCodeScorer({
69
+ expectedTool: 'search-tool',
70
+ strictMode: false,
71
+ })
72
+
73
+ // Strict mode - only passes if exactly one tool is called
74
+ const strictScorer = createCodeScorer({
75
+ expectedTool: 'search-tool',
76
+ strictMode: true,
77
+ })
78
+
79
+ // Order checking with strict mode
80
+ const strictOrderScorer = createCodeScorer({
81
+ expectedTool: 'step1-tool',
82
+ expectedToolOrder: ['step1-tool', 'step2-tool', 'step3-tool'],
83
+ strictMode: true, // no extra tools allowed
84
+ })
85
+ ```
86
+
87
+ ### Code-Based Scorer Results
88
+
89
+ ```typescript
90
+ {
91
+ runId: string,
92
+ preprocessStepResult: {
93
+ expectedTool: string,
94
+ actualTools: string[],
95
+ strictMode: boolean,
96
+ expectedToolOrder?: string[],
97
+ hasToolCalls: boolean,
98
+ correctToolCalled: boolean,
99
+ correctOrderCalled: boolean | null,
100
+ toolCallInfos: ToolCallInfo[]
101
+ },
102
+ score: number // Always 0 or 1
103
+ }
104
+ ```
105
+
106
+ ## Code-Based Scorer Examples
107
+
108
+ The code-based scorer provides deterministic, binary scoring (0 or 1) based on exact tool matching.
109
+
110
+ ### Correct tool selection
111
+
112
+ ```typescript
113
+ const scorer = createToolCallAccuracyScorerCode({
114
+ expectedTool: 'weather-tool',
115
+ })
116
+
117
+ // Simulate LLM input and output with tool call
118
+ const inputMessages = [
119
+ createTestMessage({
120
+ content: 'What is the weather like in New York today?',
121
+ role: 'user',
122
+ id: 'input-1',
123
+ }),
124
+ ]
125
+
126
+ const output = [
127
+ createTestMessage({
128
+ content: 'Let me check the weather for you.',
129
+ role: 'assistant',
130
+ id: 'output-1',
131
+ toolInvocations: [
132
+ createToolInvocation({
133
+ toolCallId: 'call-123',
134
+ toolName: 'weather-tool',
135
+ args: { location: 'New York' },
136
+ result: { temperature: '72°F', condition: 'sunny' },
137
+ state: 'result',
138
+ }),
139
+ ],
140
+ }),
141
+ ]
142
+
143
+ const run = createAgentTestRun({ inputMessages, output })
144
+ const result = await scorer.run(run)
145
+
146
+ console.log(result.score) // 1
147
+ console.log(result.preprocessStepResult?.correctToolCalled) // true
148
+ ```
149
+
150
+ ### Strict mode evaluation
151
+
152
+ Only passes if exactly one tool is called:
153
+
154
+ ```typescript
155
+ const strictScorer = createToolCallAccuracyScorerCode({
156
+ expectedTool: 'weather-tool',
157
+ strictMode: true,
158
+ })
159
+
160
+ // Multiple tools called - fails in strict mode
161
+ const output = [
162
+ createTestMessage({
163
+ content: 'Let me help you with that.',
164
+ role: 'assistant',
165
+ id: 'output-1',
166
+ toolInvocations: [
167
+ createToolInvocation({
168
+ toolCallId: 'call-1',
169
+ toolName: 'search-tool',
170
+ args: {},
171
+ result: {},
172
+ state: 'result',
173
+ }),
174
+ createToolInvocation({
175
+ toolCallId: 'call-2',
176
+ toolName: 'weather-tool',
177
+ args: { location: 'New York' },
178
+ result: { temperature: '20°C' },
179
+ state: 'result',
180
+ }),
181
+ ],
182
+ }),
183
+ ]
184
+
185
+ const result = await strictScorer.run(run)
186
+ console.log(result.score) // 0 - fails because multiple tools were called
187
+ ```
188
+
189
+ ### Tool order validation
190
+
191
+ Validates that tools are called in a specific sequence:
192
+
193
+ ```typescript
194
+ const orderScorer = createToolCallAccuracyScorerCode({
195
+ expectedTool: 'auth-tool', // ignored when order is specified
196
+ expectedToolOrder: ['auth-tool', 'fetch-tool'],
197
+ strictMode: true, // no extra tools allowed
198
+ })
199
+
200
+ const output = [
201
+ createTestMessage({
202
+ content: 'I will authenticate and fetch the data.',
203
+ role: 'assistant',
204
+ id: 'output-1',
205
+ toolInvocations: [
206
+ createToolInvocation({
207
+ toolCallId: 'call-1',
208
+ toolName: 'auth-tool',
209
+ args: { token: 'abc123' },
210
+ result: { authenticated: true },
211
+ state: 'result',
212
+ }),
213
+ createToolInvocation({
214
+ toolCallId: 'call-2',
215
+ toolName: 'fetch-tool',
216
+ args: { endpoint: '/data' },
217
+ result: { data: ['item1'] },
218
+ state: 'result',
219
+ }),
220
+ ],
221
+ }),
222
+ ]
223
+
224
+ const result = await orderScorer.run(run)
225
+ console.log(result.score) // 1 - correct order
226
+ ```
227
+
228
+ ### Flexible order mode
229
+
230
+ Allows extra tools as long as expected tools maintain relative order:
231
+
232
+ ```typescript
233
+ const flexibleOrderScorer = createToolCallAccuracyScorerCode({
234
+ expectedTool: 'auth-tool',
235
+ expectedToolOrder: ['auth-tool', 'fetch-tool'],
236
+ strictMode: false, // allows extra tools
237
+ })
238
+
239
+ const output = [
240
+ createTestMessage({
241
+ content: 'Performing comprehensive operation.',
242
+ role: 'assistant',
243
+ id: 'output-1',
244
+ toolInvocations: [
245
+ createToolInvocation({
246
+ toolCallId: 'call-1',
247
+ toolName: 'auth-tool',
248
+ args: { token: 'abc123' },
249
+ result: { authenticated: true },
250
+ state: 'result',
251
+ }),
252
+ createToolInvocation({
253
+ toolCallId: 'call-2',
254
+ toolName: 'log-tool', // Extra tool - OK in flexible mode
255
+ args: { message: 'Starting fetch' },
256
+ result: { logged: true },
257
+ state: 'result',
258
+ }),
259
+ createToolInvocation({
260
+ toolCallId: 'call-3',
261
+ toolName: 'fetch-tool',
262
+ args: { endpoint: '/data' },
263
+ result: { data: ['item1'] },
264
+ state: 'result',
265
+ }),
266
+ ],
267
+ }),
268
+ ]
269
+
270
+ const result = await flexibleOrderScorer.run(run)
271
+ console.log(result.score) // 1 - auth-tool comes before fetch-tool
272
+ ```
273
+
274
+ ## LLM-Based Tool Call Accuracy Scorer
275
+
276
+ The `createToolCallAccuracyScorerLLM()` function from `@mastra/evals/scorers/prebuilt` uses an LLM to evaluate whether the tools called by an agent are appropriate for the given user request, providing semantic evaluation rather than exact matching.
277
+
278
+ ### Parameters
279
+
280
+ **model:** (`MastraModelConfig`): The LLM model to use for evaluating tool appropriateness
281
+
282
+ **availableTools:** (`Array<{name: string, description: string}>`): List of available tools with their descriptions for context
283
+
284
+ ### Features
285
+
286
+ The LLM-based scorer provides:
287
+
288
+ - **Semantic Evaluation**: Understands context and user intent
289
+ - **Appropriateness Assessment**: Distinguishes between "helpful" and "appropriate" tools
290
+ - **Clarification Handling**: Recognizes when agents appropriately ask for clarification
291
+ - **Missing Tool Detection**: Identifies tools that should have been called
292
+ - **Reasoning Generation**: Provides explanations for scoring decisions
293
+
294
+ ### Evaluation Process
295
+
296
+ 1. **Extract Tool Calls**: Identifies tools mentioned in agent output
297
+ 2. **Analyze Appropriateness**: Evaluates each tool against user request
298
+ 3. **Generate Score**: Calculates score based on appropriate vs total tool calls
299
+ 4. **Generate Reasoning**: Provides human-readable explanation
300
+
301
+ ## LLM-Based Scoring Details
302
+
303
+ - **Fractional scores**: Returns values between 0.0 and 1.0
304
+ - **Context-aware**: Considers user intent and appropriateness
305
+ - **Explanatory**: Provides reasoning for scores
306
+
307
+ ### LLM-Based Scorer Options
308
+
309
+ ```typescript
310
+ // Basic configuration
311
+ const basicLLMScorer = createLLMScorer({
312
+ model: 'openai/gpt-5.1',
313
+ availableTools: [
314
+ { name: 'tool1', description: 'Description 1' },
315
+ { name: 'tool2', description: 'Description 2' }
316
+ ]
317
+ });
318
+
319
+ // With different model
320
+ const customModelScorer = createLLMScorer({
321
+ model: 'openai/gpt-5', // More powerful model for complex evaluations
322
+ availableTools: [...]
323
+ });
324
+ ```
325
+
326
+ ### LLM-Based Scorer Results
327
+
328
+ ```typescript
329
+ {
330
+ runId: string,
331
+ score: number, // 0.0 to 1.0
332
+ reason: string, // Human-readable explanation
333
+ analyzeStepResult: {
334
+ evaluations: Array<{
335
+ toolCalled: string,
336
+ wasAppropriate: boolean,
337
+ reasoning: string
338
+ }>,
339
+ missingTools?: string[]
340
+ }
341
+ }
342
+ ```
343
+
344
+ ## LLM-Based Scorer Examples
345
+
346
+ The LLM-based scorer uses AI to evaluate whether tool selections are appropriate for the user's request.
347
+
348
+ ### Basic LLM evaluation
349
+
350
+ ```typescript
351
+ const llmScorer = createToolCallAccuracyScorerLLM({
352
+ model: 'openai/gpt-5.1',
353
+ availableTools: [
354
+ {
355
+ name: 'weather-tool',
356
+ description: 'Get current weather information for any location',
357
+ },
358
+ {
359
+ name: 'calendar-tool',
360
+ description: 'Check calendar events and scheduling',
361
+ },
362
+ {
363
+ name: 'search-tool',
364
+ description: 'Search the web for general information',
365
+ },
366
+ ],
367
+ })
368
+
369
+ const inputMessages = [
370
+ createTestMessage({
371
+ content: 'What is the weather like in San Francisco today?',
372
+ role: 'user',
373
+ id: 'input-1',
374
+ }),
375
+ ]
376
+
377
+ const output = [
378
+ createTestMessage({
379
+ content: 'Let me check the current weather for you.',
380
+ role: 'assistant',
381
+ id: 'output-1',
382
+ toolInvocations: [
383
+ createToolInvocation({
384
+ toolCallId: 'call-123',
385
+ toolName: 'weather-tool',
386
+ args: { location: 'San Francisco', date: 'today' },
387
+ result: { temperature: '68°F', condition: 'foggy' },
388
+ state: 'result',
389
+ }),
390
+ ],
391
+ }),
392
+ ]
393
+
394
+ const run = createAgentTestRun({ inputMessages, output })
395
+ const result = await llmScorer.run(run)
396
+
397
+ console.log(result.score) // 1.0 - appropriate tool usage
398
+ console.log(result.reason) // "The agent correctly used the weather-tool to address the user's request for weather information."
399
+ ```
400
+
401
+ ### Handling inappropriate tool usage
402
+
403
+ ```typescript
404
+ const inputMessages = [
405
+ createTestMessage({
406
+ content: 'What is the weather in Tokyo?',
407
+ role: 'user',
408
+ id: 'input-1',
409
+ }),
410
+ ]
411
+
412
+ const inappropriateOutput = [
413
+ createTestMessage({
414
+ content: 'Let me search for that information.',
415
+ role: 'assistant',
416
+ id: 'output-1',
417
+ toolInvocations: [
418
+ createToolInvocation({
419
+ toolCallId: 'call-456',
420
+ toolName: 'search-tool', // Less appropriate than weather-tool
421
+ args: { query: 'Tokyo weather' },
422
+ result: { results: ['Tokyo weather data...'] },
423
+ state: 'result',
424
+ }),
425
+ ],
426
+ }),
427
+ ]
428
+
429
+ const run = createAgentTestRun({ inputMessages, output: inappropriateOutput })
430
+ const result = await llmScorer.run(run)
431
+
432
+ console.log(result.score) // 0.5 - partially appropriate
433
+ console.log(result.reason) // "The agent used search-tool when weather-tool would have been more appropriate for a direct weather query."
434
+ ```
435
+
436
+ ### Evaluating clarification requests
437
+
438
+ The LLM scorer recognizes when agents appropriately ask for clarification:
439
+
440
+ ```typescript
441
+ const vagueInput = [
442
+ createTestMessage({
443
+ content: 'I need help with something',
444
+ role: 'user',
445
+ id: 'input-1'
446
+ })
447
+ ];
448
+
449
+ const clarificationOutput = [
450
+ createTestMessage({
451
+ content: 'I'd be happy to help! Could you please provide more details about what you need assistance with?',
452
+ role: 'assistant',
453
+ id: 'output-1',
454
+ // No tools called - asking for clarification instead
455
+ })
456
+ ];
457
+
458
+ const run = createAgentTestRun({
459
+ inputMessages: vagueInput,
460
+ output: clarificationOutput
461
+ });
462
+ const result = await llmScorer.run(run);
463
+
464
+ console.log(result.score); // 1.0 - appropriate to ask for clarification
465
+ console.log(result.reason); // "The agent appropriately asked for clarification rather than calling tools with insufficient information."
466
+ ```
467
+
468
+ ## Comparing Both Scorers
469
+
470
+ Here's an example using both scorers on the same data:
471
+
472
+ ```typescript
473
+ import {
474
+ createToolCallAccuracyScorerCode as createCodeScorer,
475
+ createToolCallAccuracyScorerLLM as createLLMScorer,
476
+ } from '@mastra/evals/scorers/prebuilt'
477
+
478
+ // Setup both scorers
479
+ const codeScorer = createCodeScorer({
480
+ expectedTool: 'weather-tool',
481
+ strictMode: false,
482
+ })
483
+
484
+ const llmScorer = createLLMScorer({
485
+ model: 'openai/gpt-5.1',
486
+ availableTools: [
487
+ { name: 'weather-tool', description: 'Get weather information' },
488
+ { name: 'search-tool', description: 'Search the web' },
489
+ ],
490
+ })
491
+
492
+ // Test data
493
+ const run = createAgentTestRun({
494
+ inputMessages: [
495
+ createTestMessage({
496
+ content: 'What is the weather?',
497
+ role: 'user',
498
+ id: 'input-1',
499
+ }),
500
+ ],
501
+ output: [
502
+ createTestMessage({
503
+ content: 'Let me find that information.',
504
+ role: 'assistant',
505
+ id: 'output-1',
506
+ toolInvocations: [
507
+ createToolInvocation({
508
+ toolCallId: 'call-1',
509
+ toolName: 'search-tool',
510
+ args: { query: 'weather' },
511
+ result: { results: ['weather data'] },
512
+ state: 'result',
513
+ }),
514
+ ],
515
+ }),
516
+ ],
517
+ })
518
+
519
+ // Run both scorers
520
+ const codeResult = await codeScorer.run(run)
521
+ const llmResult = await llmScorer.run(run)
522
+
523
+ console.log('Code Scorer:', codeResult.score) // 0 - wrong tool
524
+ console.log('LLM Scorer:', llmResult.score) // 0.3 - partially appropriate
525
+ console.log('LLM Reason:', llmResult.reason) // Explains why search-tool is less appropriate
526
+ ```
527
+
528
+ ## Related
529
+
530
+ - [Answer Relevancy Scorer](https://mastra.ai/reference/evals/answer-relevancy)
531
+ - [Completeness Scorer](https://mastra.ai/reference/evals/completeness)
532
+ - [Faithfulness Scorer](https://mastra.ai/reference/evals/faithfulness)
533
+ - [Custom Scorers](https://mastra.ai/docs/evals/custom-scorers)