@mastra/mcp-docs-server 0.13.39 → 1.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (494) hide show
  1. package/.docs/organized/changelogs/%40internal%2Fai-sdk-v4.md +1 -0
  2. package/.docs/organized/changelogs/%40internal%2Fchangeset-cli.md +0 -10
  3. package/.docs/organized/changelogs/%40internal%2Fexternal-types.md +0 -10
  4. package/.docs/organized/changelogs/%40internal%2Fstorage-test-utils.md +36 -36
  5. package/.docs/organized/changelogs/%40internal%2Ftypes-builder.md +0 -10
  6. package/.docs/organized/changelogs/%40mastra%2Fagent-builder.md +70 -70
  7. package/.docs/organized/changelogs/%40mastra%2Fai-sdk.md +40 -40
  8. package/.docs/organized/changelogs/%40mastra%2Fastra.md +19 -19
  9. package/.docs/organized/changelogs/%40mastra%2Fauth.md +4 -14
  10. package/.docs/organized/changelogs/%40mastra%2Fchroma.md +18 -18
  11. package/.docs/organized/changelogs/%40mastra%2Fclickhouse.md +199 -199
  12. package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +220 -220
  13. package/.docs/organized/changelogs/%40mastra%2Fcloudflare-d1.md +190 -190
  14. package/.docs/organized/changelogs/%40mastra%2Fcloudflare.md +199 -199
  15. package/.docs/organized/changelogs/%40mastra%2Fcodemod.md +7 -0
  16. package/.docs/organized/changelogs/%40mastra%2Fcore.md +210 -210
  17. package/.docs/organized/changelogs/%40mastra%2Fcouchbase.md +16 -16
  18. package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloud.md +69 -69
  19. package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +67 -67
  20. package/.docs/organized/changelogs/%40mastra%2Fdeployer-netlify.md +70 -70
  21. package/.docs/organized/changelogs/%40mastra%2Fdeployer-vercel.md +67 -67
  22. package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +209 -209
  23. package/.docs/organized/changelogs/%40mastra%2Fdynamodb.md +191 -191
  24. package/.docs/organized/changelogs/%40mastra%2Fevals.md +34 -34
  25. package/.docs/organized/changelogs/%40mastra%2Ffastembed.md +5 -13
  26. package/.docs/organized/changelogs/%40mastra%2Flance.md +182 -182
  27. package/.docs/organized/changelogs/%40mastra%2Flibsql.md +199 -199
  28. package/.docs/organized/changelogs/%40mastra%2Floggers.md +20 -20
  29. package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +56 -56
  30. package/.docs/organized/changelogs/%40mastra%2Fmcp-registry-registry.md +20 -20
  31. package/.docs/organized/changelogs/%40mastra%2Fmcp.md +65 -65
  32. package/.docs/organized/changelogs/%40mastra%2Fmemory.md +228 -228
  33. package/.docs/organized/changelogs/%40mastra%2Fmongodb.md +199 -199
  34. package/.docs/organized/changelogs/%40mastra%2Fmssql.md +206 -206
  35. package/.docs/organized/changelogs/%40mastra%2Fopensearch.md +19 -19
  36. package/.docs/organized/changelogs/%40mastra%2Fpg.md +197 -197
  37. package/.docs/organized/changelogs/%40mastra%2Fpinecone.md +16 -16
  38. package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +216 -216
  39. package/.docs/organized/changelogs/%40mastra%2Fqdrant.md +16 -16
  40. package/.docs/organized/changelogs/%40mastra%2Frag.md +61 -61
  41. package/.docs/organized/changelogs/%40mastra%2Freact.md +66 -66
  42. package/.docs/organized/changelogs/%40mastra%2Fs3vectors.md +9 -17
  43. package/.docs/organized/changelogs/%40mastra%2Fschema-compat.md +6 -30
  44. package/.docs/organized/changelogs/%40mastra%2Fserver.md +203 -203
  45. package/.docs/organized/changelogs/%40mastra%2Fturbopuffer.md +16 -16
  46. package/.docs/organized/changelogs/%40mastra%2Fupstash.md +190 -190
  47. package/.docs/organized/changelogs/%40mastra%2Fvectorize.md +18 -18
  48. package/.docs/organized/changelogs/%40mastra%2Fvoice-azure.md +21 -21
  49. package/.docs/organized/changelogs/%40mastra%2Fvoice-cloudflare.md +20 -20
  50. package/.docs/organized/changelogs/%40mastra%2Fvoice-deepgram.md +20 -20
  51. package/.docs/organized/changelogs/%40mastra%2Fvoice-elevenlabs.md +20 -20
  52. package/.docs/organized/changelogs/%40mastra%2Fvoice-gladia.md +20 -20
  53. package/.docs/organized/changelogs/%40mastra%2Fvoice-google-gemini-live.md +56 -56
  54. package/.docs/organized/changelogs/%40mastra%2Fvoice-google.md +20 -20
  55. package/.docs/organized/changelogs/%40mastra%2Fvoice-murf.md +20 -20
  56. package/.docs/organized/changelogs/%40mastra%2Fvoice-openai-realtime.md +56 -56
  57. package/.docs/organized/changelogs/%40mastra%2Fvoice-openai.md +20 -20
  58. package/.docs/organized/changelogs/%40mastra%2Fvoice-playai.md +20 -20
  59. package/.docs/organized/changelogs/%40mastra%2Fvoice-sarvam.md +20 -20
  60. package/.docs/organized/changelogs/%40mastra%2Fvoice-speechify.md +20 -20
  61. package/.docs/organized/changelogs/create-mastra.md +29 -29
  62. package/.docs/organized/changelogs/mastra.md +93 -93
  63. package/.docs/organized/code-examples/a2a.md +4 -2
  64. package/.docs/organized/code-examples/agui.md +12 -9
  65. package/.docs/organized/code-examples/ai-sdk-useChat.md +12 -18
  66. package/.docs/organized/code-examples/ai-sdk-v5.md +4 -2
  67. package/.docs/organized/code-examples/bird-checker-with-express.md +5 -4
  68. package/.docs/organized/code-examples/bird-checker-with-nextjs-and-eval.md +4 -3
  69. package/.docs/organized/code-examples/bird-checker-with-nextjs.md +4 -3
  70. package/.docs/organized/code-examples/client-side-tools.md +1 -0
  71. package/.docs/organized/code-examples/crypto-chatbot.md +1 -1
  72. package/.docs/organized/code-examples/experimental-auth-weather-agent.md +8 -177
  73. package/.docs/organized/code-examples/fireworks-r1.md +2 -2
  74. package/.docs/organized/code-examples/heads-up-game.md +10 -7
  75. package/.docs/organized/code-examples/mcp-configuration.md +5 -3
  76. package/.docs/organized/code-examples/mcp-registry-registry.md +3 -2
  77. package/.docs/organized/code-examples/memory-per-resource-example.md +4 -2
  78. package/.docs/organized/code-examples/memory-todo-agent.md +1 -0
  79. package/.docs/organized/code-examples/memory-with-context.md +2 -1
  80. package/.docs/organized/code-examples/memory-with-libsql.md +4 -2
  81. package/.docs/organized/code-examples/memory-with-mongodb.md +4 -2
  82. package/.docs/organized/code-examples/memory-with-pg.md +4 -2
  83. package/.docs/organized/code-examples/memory-with-processors.md +13 -8
  84. package/.docs/organized/code-examples/memory-with-upstash.md +5 -3
  85. package/.docs/organized/code-examples/openapi-spec-writer.md +32 -41
  86. package/.docs/organized/code-examples/quick-start.md +5 -32
  87. package/.docs/organized/code-examples/stock-price-tool.md +6 -5
  88. package/.docs/organized/code-examples/weather-agent.md +21 -16
  89. package/.docs/organized/code-examples/workflow-ai-recruiter.md +3 -2
  90. package/.docs/organized/code-examples/workflow-with-inline-steps.md +9 -12
  91. package/.docs/organized/code-examples/workflow-with-memory.md +16 -15
  92. package/.docs/organized/code-examples/workflow-with-separate-steps.md +2 -2
  93. package/.docs/organized/code-examples/workflow-with-suspend-resume.md +3 -2
  94. package/.docs/raw/agents/adding-voice.mdx +27 -22
  95. package/.docs/raw/agents/agent-memory.mdx +24 -16
  96. package/.docs/raw/agents/guardrails.mdx +33 -12
  97. package/.docs/raw/agents/networks.mdx +8 -4
  98. package/.docs/raw/agents/overview.mdx +23 -17
  99. package/.docs/raw/agents/using-tools.mdx +11 -8
  100. package/.docs/raw/auth/auth0.mdx +9 -9
  101. package/.docs/raw/auth/clerk.mdx +7 -7
  102. package/.docs/raw/auth/firebase.mdx +9 -9
  103. package/.docs/raw/auth/index.mdx +6 -6
  104. package/.docs/raw/auth/jwt.mdx +7 -7
  105. package/.docs/raw/auth/supabase.mdx +8 -8
  106. package/.docs/raw/auth/workos.mdx +9 -9
  107. package/.docs/raw/community/contributing-templates.mdx +3 -3
  108. package/.docs/raw/community/discord.mdx +1 -1
  109. package/.docs/raw/course/01-first-agent/03-verifying-installation.md +1 -1
  110. package/.docs/raw/course/01-first-agent/08-exporting-your-agent.md +2 -1
  111. package/.docs/raw/course/01-first-agent/16-adding-memory-to-agent.md +2 -1
  112. package/.docs/raw/course/02-agent-tools-mcp/02-installing-mcp.md +1 -1
  113. package/.docs/raw/course/02-agent-tools-mcp/31-enhancing-memory-configuration.md +2 -0
  114. package/.docs/raw/course/03-agent-memory/03-installing-memory.md +1 -1
  115. package/.docs/raw/course/03-agent-memory/04-creating-basic-memory-agent.md +1 -0
  116. package/.docs/raw/course/03-agent-memory/10-storage-configuration.md +2 -3
  117. package/.docs/raw/course/03-agent-memory/13-vector-store-configuration.md +2 -0
  118. package/.docs/raw/course/03-agent-memory/16-configuring-semantic-recall.md +2 -0
  119. package/.docs/raw/course/03-agent-memory/18-advanced-configuration-semantic-recall.md +1 -0
  120. package/.docs/raw/course/03-agent-memory/21-configuring-working-memory.md +2 -0
  121. package/.docs/raw/course/03-agent-memory/22-custom-working-memory-templates.md +1 -0
  122. package/.docs/raw/course/03-agent-memory/25-combining-memory-features.md +1 -0
  123. package/.docs/raw/course/03-agent-memory/27-creating-learning-assistant.md +1 -0
  124. package/.docs/raw/course/04-workflows/08-running-workflows-programmatically.md +2 -2
  125. package/.docs/raw/deployment/cloud-providers/amazon-ec2.mdx +6 -6
  126. package/.docs/raw/deployment/cloud-providers/aws-lambda.mdx +8 -6
  127. package/.docs/raw/deployment/cloud-providers/azure-app-services.mdx +5 -5
  128. package/.docs/raw/deployment/cloud-providers/digital-ocean.mdx +5 -5
  129. package/.docs/raw/deployment/cloud-providers/index.mdx +11 -8
  130. package/.docs/raw/deployment/monorepo.mdx +2 -2
  131. package/.docs/raw/deployment/overview.mdx +2 -2
  132. package/.docs/raw/deployment/server-deployment.mdx +2 -10
  133. package/.docs/raw/deployment/serverless-platforms/cloudflare-deployer.mdx +5 -5
  134. package/.docs/raw/deployment/serverless-platforms/index.mdx +10 -7
  135. package/.docs/raw/deployment/serverless-platforms/netlify-deployer.mdx +5 -5
  136. package/.docs/raw/deployment/serverless-platforms/vercel-deployer.mdx +5 -5
  137. package/.docs/raw/deployment/web-framework.mdx +8 -8
  138. package/.docs/raw/{scorers → evals}/custom-scorers.mdx +6 -6
  139. package/.docs/raw/evals/off-the-shelf-scorers.mdx +50 -0
  140. package/.docs/raw/{scorers → evals}/overview.mdx +9 -9
  141. package/.docs/raw/evals/running-in-ci.mdx +113 -0
  142. package/.docs/raw/frameworks/agentic-uis/ai-sdk.mdx +26 -25
  143. package/.docs/raw/frameworks/agentic-uis/assistant-ui.mdx +1 -1
  144. package/.docs/raw/frameworks/agentic-uis/copilotkit.mdx +17 -17
  145. package/.docs/raw/frameworks/agentic-uis/openrouter.mdx +4 -1
  146. package/.docs/raw/frameworks/servers/express.mdx +11 -10
  147. package/.docs/raw/frameworks/web-frameworks/astro.mdx +18 -18
  148. package/.docs/raw/frameworks/web-frameworks/next-js.mdx +7 -7
  149. package/.docs/raw/frameworks/web-frameworks/sveltekit.mdx +16 -16
  150. package/.docs/raw/frameworks/web-frameworks/vite-react.mdx +7 -7
  151. package/.docs/raw/getting-started/installation.mdx +26 -25
  152. package/.docs/raw/getting-started/mcp-docs-server.mdx +1 -1
  153. package/.docs/raw/getting-started/project-structure.mdx +4 -4
  154. package/.docs/raw/getting-started/studio.mdx +8 -8
  155. package/.docs/raw/getting-started/templates.mdx +6 -6
  156. package/.docs/raw/guides/guide/ai-recruiter.mdx +264 -0
  157. package/.docs/raw/guides/guide/chef-michel.mdx +271 -0
  158. package/.docs/raw/guides/guide/notes-mcp-server.mdx +450 -0
  159. package/.docs/raw/guides/guide/research-assistant.mdx +380 -0
  160. package/.docs/raw/guides/guide/stock-agent.mdx +185 -0
  161. package/.docs/raw/guides/guide/web-search.mdx +291 -0
  162. package/.docs/raw/guides/index.mdx +43 -0
  163. package/.docs/raw/guides/migrations/agentnetwork.mdx +114 -0
  164. package/.docs/raw/guides/migrations/upgrade-to-v1/_template.mdx +50 -0
  165. package/.docs/raw/guides/migrations/upgrade-to-v1/agent.mdx +265 -0
  166. package/.docs/raw/guides/migrations/upgrade-to-v1/cli.mdx +48 -0
  167. package/.docs/raw/guides/migrations/upgrade-to-v1/client.mdx +153 -0
  168. package/.docs/raw/guides/migrations/upgrade-to-v1/evals.mdx +230 -0
  169. package/.docs/raw/guides/migrations/upgrade-to-v1/mastra.mdx +171 -0
  170. package/.docs/raw/guides/migrations/upgrade-to-v1/mcp.mdx +114 -0
  171. package/.docs/raw/guides/migrations/upgrade-to-v1/memory.mdx +241 -0
  172. package/.docs/raw/guides/migrations/upgrade-to-v1/overview.mdx +83 -0
  173. package/.docs/raw/guides/migrations/upgrade-to-v1/processors.mdx +62 -0
  174. package/.docs/raw/guides/migrations/upgrade-to-v1/storage.mdx +270 -0
  175. package/.docs/raw/guides/migrations/upgrade-to-v1/tools.mdx +115 -0
  176. package/.docs/raw/guides/migrations/upgrade-to-v1/tracing.mdx +280 -0
  177. package/.docs/raw/guides/migrations/upgrade-to-v1/vectors.mdx +23 -0
  178. package/.docs/raw/guides/migrations/upgrade-to-v1/voice.mdx +39 -0
  179. package/.docs/raw/guides/migrations/upgrade-to-v1/workflows.mdx +178 -0
  180. package/.docs/raw/guides/migrations/vnext-to-standard-apis.mdx +367 -0
  181. package/.docs/raw/guides/quickstarts/nextjs.mdx +275 -0
  182. package/.docs/raw/index.mdx +9 -9
  183. package/.docs/raw/{observability/logging.mdx → logging.mdx} +4 -4
  184. package/.docs/raw/mastra-cloud/dashboard.mdx +2 -2
  185. package/.docs/raw/mastra-cloud/observability.mdx +6 -6
  186. package/.docs/raw/mastra-cloud/overview.mdx +2 -2
  187. package/.docs/raw/mastra-cloud/setting-up.mdx +4 -4
  188. package/.docs/raw/memory/conversation-history.mdx +1 -0
  189. package/.docs/raw/memory/memory-processors.mdx +4 -3
  190. package/.docs/raw/memory/overview.mdx +10 -6
  191. package/.docs/raw/memory/semantic-recall.mdx +13 -8
  192. package/.docs/raw/memory/storage/memory-with-libsql.mdx +12 -7
  193. package/.docs/raw/memory/storage/memory-with-pg.mdx +11 -6
  194. package/.docs/raw/memory/storage/memory-with-upstash.mdx +11 -6
  195. package/.docs/raw/memory/threads-and-resources.mdx +11 -13
  196. package/.docs/raw/memory/working-memory.mdx +30 -14
  197. package/.docs/raw/observability/overview.mdx +13 -30
  198. package/.docs/raw/observability/{ai-tracing → tracing}/exporters/arize.mdx +11 -19
  199. package/.docs/raw/observability/{ai-tracing → tracing}/exporters/braintrust.mdx +8 -17
  200. package/.docs/raw/observability/{ai-tracing → tracing}/exporters/cloud.mdx +11 -17
  201. package/.docs/raw/observability/{ai-tracing → tracing}/exporters/default.mdx +16 -20
  202. package/.docs/raw/observability/{ai-tracing → tracing}/exporters/langfuse.mdx +8 -17
  203. package/.docs/raw/observability/{ai-tracing → tracing}/exporters/langsmith.mdx +8 -17
  204. package/.docs/raw/observability/{ai-tracing → tracing}/exporters/otel.mdx +12 -21
  205. package/.docs/raw/observability/{ai-tracing → tracing}/overview.mdx +107 -142
  206. package/.docs/raw/observability/{ai-tracing → tracing}/processors/sensitive-data-filter.mdx +14 -13
  207. package/.docs/raw/rag/chunking-and-embedding.mdx +5 -5
  208. package/.docs/raw/rag/overview.mdx +3 -13
  209. package/.docs/raw/rag/retrieval.mdx +24 -12
  210. package/.docs/raw/rag/vector-databases.mdx +7 -1
  211. package/.docs/raw/reference/agents/agent.mdx +35 -30
  212. package/.docs/raw/reference/agents/generate.mdx +10 -10
  213. package/.docs/raw/reference/agents/generateLegacy.mdx +8 -8
  214. package/.docs/raw/reference/agents/getDefaultGenerateOptions.mdx +21 -15
  215. package/.docs/raw/reference/agents/getDefaultOptions.mdx +69 -0
  216. package/.docs/raw/reference/agents/getDefaultStreamOptions.mdx +22 -16
  217. package/.docs/raw/reference/agents/getDescription.mdx +1 -1
  218. package/.docs/raw/reference/agents/getInstructions.mdx +8 -8
  219. package/.docs/raw/reference/agents/getLLM.mdx +9 -9
  220. package/.docs/raw/reference/agents/getMemory.mdx +9 -9
  221. package/.docs/raw/reference/agents/getModel.mdx +10 -10
  222. package/.docs/raw/reference/agents/getVoice.mdx +8 -8
  223. package/.docs/raw/reference/agents/listAgents.mdx +9 -9
  224. package/.docs/raw/reference/agents/listScorers.mdx +7 -7
  225. package/.docs/raw/reference/agents/listTools.mdx +7 -7
  226. package/.docs/raw/reference/agents/listWorkflows.mdx +7 -7
  227. package/.docs/raw/reference/agents/network.mdx +11 -10
  228. package/.docs/raw/reference/auth/auth0.mdx +4 -4
  229. package/.docs/raw/reference/auth/clerk.mdx +4 -4
  230. package/.docs/raw/reference/auth/firebase.mdx +6 -6
  231. package/.docs/raw/reference/auth/jwt.mdx +4 -4
  232. package/.docs/raw/reference/auth/supabase.mdx +4 -4
  233. package/.docs/raw/reference/auth/workos.mdx +4 -4
  234. package/.docs/raw/reference/cli/create-mastra.mdx +10 -10
  235. package/.docs/raw/reference/cli/mastra.mdx +7 -7
  236. package/.docs/raw/reference/client-js/agents.mdx +6 -2
  237. package/.docs/raw/reference/client-js/mastra-client.mdx +7 -7
  238. package/.docs/raw/reference/client-js/memory.mdx +24 -16
  239. package/.docs/raw/reference/client-js/observability.mdx +11 -11
  240. package/.docs/raw/reference/client-js/workflows.mdx +6 -34
  241. package/.docs/raw/reference/core/getAgent.mdx +1 -1
  242. package/.docs/raw/reference/core/getAgentById.mdx +1 -1
  243. package/.docs/raw/reference/core/getDeployer.mdx +2 -2
  244. package/.docs/raw/reference/core/getLogger.mdx +2 -2
  245. package/.docs/raw/reference/core/getMCPServer.mdx +31 -15
  246. package/.docs/raw/reference/core/getMCPServerById.mdx +81 -0
  247. package/.docs/raw/reference/core/getScorer.mdx +3 -3
  248. package/.docs/raw/reference/core/getScorerById.mdx +79 -0
  249. package/.docs/raw/reference/core/getServer.mdx +2 -2
  250. package/.docs/raw/reference/core/getStorage.mdx +2 -2
  251. package/.docs/raw/reference/core/getTelemetry.mdx +2 -2
  252. package/.docs/raw/reference/core/getVector.mdx +2 -2
  253. package/.docs/raw/reference/core/getWorkflow.mdx +1 -1
  254. package/.docs/raw/reference/core/listAgents.mdx +1 -1
  255. package/.docs/raw/reference/core/listLogs.mdx +2 -2
  256. package/.docs/raw/reference/core/listLogsByRunId.mdx +2 -2
  257. package/.docs/raw/reference/core/listMCPServers.mdx +65 -0
  258. package/.docs/raw/reference/core/listScorers.mdx +3 -3
  259. package/.docs/raw/reference/core/listVectors.mdx +36 -0
  260. package/.docs/raw/reference/core/listWorkflows.mdx +6 -6
  261. package/.docs/raw/reference/core/mastra-class.mdx +3 -2
  262. package/.docs/raw/reference/core/setLogger.mdx +2 -2
  263. package/.docs/raw/reference/core/setStorage.mdx +3 -2
  264. package/.docs/raw/reference/core/setTelemetry.mdx +2 -2
  265. package/.docs/raw/reference/deployer/cloudflare.mdx +2 -2
  266. package/.docs/raw/reference/deployer/deployer.mdx +0 -6
  267. package/.docs/raw/reference/deployer/netlify.mdx +2 -2
  268. package/.docs/raw/reference/deployer/vercel.mdx +3 -3
  269. package/.docs/raw/reference/evals/answer-relevancy.mdx +164 -126
  270. package/.docs/raw/reference/{scorers → evals}/answer-similarity.mdx +27 -27
  271. package/.docs/raw/reference/evals/bias.mdx +149 -115
  272. package/.docs/raw/reference/evals/completeness.mdx +148 -117
  273. package/.docs/raw/reference/evals/content-similarity.mdx +126 -113
  274. package/.docs/raw/reference/evals/context-precision.mdx +290 -133
  275. package/.docs/raw/reference/{scorers → evals}/context-relevance.mdx +6 -6
  276. package/.docs/raw/reference/{scorers → evals}/create-scorer.mdx +69 -60
  277. package/.docs/raw/reference/evals/faithfulness.mdx +163 -121
  278. package/.docs/raw/reference/evals/hallucination.mdx +159 -132
  279. package/.docs/raw/reference/evals/keyword-coverage.mdx +169 -125
  280. package/.docs/raw/reference/{scorers → evals}/mastra-scorer.mdx +7 -5
  281. package/.docs/raw/reference/{scorers → evals}/noise-sensitivity.mdx +9 -9
  282. package/.docs/raw/reference/evals/prompt-alignment.mdx +604 -182
  283. package/.docs/raw/reference/{scorers/run-experiment.mdx → evals/run-evals.mdx} +17 -18
  284. package/.docs/raw/reference/evals/textual-difference.mdx +149 -117
  285. package/.docs/raw/reference/evals/tone-consistency.mdx +149 -125
  286. package/.docs/raw/reference/{scorers → evals}/tool-call-accuracy.mdx +8 -6
  287. package/.docs/raw/reference/evals/toxicity.mdx +152 -96
  288. package/.docs/raw/reference/{observability/logging → logging}/pino-logger.mdx +2 -2
  289. package/.docs/raw/reference/memory/createThread.mdx +5 -5
  290. package/.docs/raw/reference/memory/deleteMessages.mdx +7 -7
  291. package/.docs/raw/reference/memory/getThreadById.mdx +4 -4
  292. package/.docs/raw/reference/memory/listThreadsByResourceId.mdx +110 -0
  293. package/.docs/raw/reference/memory/memory-class.mdx +13 -9
  294. package/.docs/raw/reference/memory/query.mdx +58 -57
  295. package/.docs/raw/reference/memory/recall.mdx +185 -0
  296. package/.docs/raw/reference/observability/tracing/configuration.mdx +245 -0
  297. package/.docs/raw/reference/observability/{ai-tracing → tracing}/exporters/arize.mdx +13 -13
  298. package/.docs/raw/reference/observability/{ai-tracing → tracing}/exporters/braintrust.mdx +11 -8
  299. package/.docs/raw/reference/observability/{ai-tracing → tracing}/exporters/cloud-exporter.mdx +21 -19
  300. package/.docs/raw/reference/observability/{ai-tracing → tracing}/exporters/console-exporter.mdx +49 -17
  301. package/.docs/raw/reference/observability/{ai-tracing → tracing}/exporters/default-exporter.mdx +42 -41
  302. package/.docs/raw/reference/observability/{ai-tracing → tracing}/exporters/langfuse.mdx +10 -7
  303. package/.docs/raw/reference/observability/{ai-tracing → tracing}/exporters/langsmith.mdx +10 -7
  304. package/.docs/raw/reference/observability/{ai-tracing → tracing}/exporters/otel.mdx +5 -5
  305. package/.docs/raw/reference/observability/tracing/instances.mdx +168 -0
  306. package/.docs/raw/reference/observability/{ai-tracing → tracing}/interfaces.mdx +115 -89
  307. package/.docs/raw/reference/observability/{ai-tracing → tracing}/processors/sensitive-data-filter.mdx +3 -3
  308. package/.docs/raw/reference/observability/{ai-tracing/span.mdx → tracing/spans.mdx} +59 -41
  309. package/.docs/raw/reference/processors/batch-parts-processor.mdx +9 -3
  310. package/.docs/raw/reference/processors/language-detector.mdx +9 -3
  311. package/.docs/raw/reference/processors/moderation-processor.mdx +9 -3
  312. package/.docs/raw/reference/processors/pii-detector.mdx +9 -3
  313. package/.docs/raw/reference/processors/prompt-injection-detector.mdx +9 -3
  314. package/.docs/raw/reference/processors/system-prompt-scrubber.mdx +9 -3
  315. package/.docs/raw/reference/processors/token-limiter-processor.mdx +9 -3
  316. package/.docs/raw/reference/processors/unicode-normalizer.mdx +9 -3
  317. package/.docs/raw/reference/rag/chunk.mdx +1 -8
  318. package/.docs/raw/reference/rag/database-config.mdx +7 -7
  319. package/.docs/raw/reference/rag/metadata-filters.mdx +14 -11
  320. package/.docs/raw/reference/storage/cloudflare-d1.mdx +1 -1
  321. package/.docs/raw/reference/storage/cloudflare.mdx +1 -1
  322. package/.docs/raw/reference/storage/dynamodb.mdx +3 -3
  323. package/.docs/raw/reference/storage/lance.mdx +1 -1
  324. package/.docs/raw/reference/storage/libsql.mdx +3 -1
  325. package/.docs/raw/reference/storage/mongodb.mdx +1 -1
  326. package/.docs/raw/reference/storage/mssql.mdx +6 -1
  327. package/.docs/raw/reference/storage/postgresql.mdx +7 -1
  328. package/.docs/raw/reference/storage/upstash.mdx +2 -1
  329. package/.docs/raw/reference/streaming/agents/stream.mdx +12 -12
  330. package/.docs/raw/reference/streaming/agents/streamLegacy.mdx +8 -8
  331. package/.docs/raw/reference/streaming/workflows/observeStream.mdx +3 -3
  332. package/.docs/raw/reference/streaming/workflows/observeStreamVNext.mdx +3 -3
  333. package/.docs/raw/reference/streaming/workflows/resumeStreamVNext.mdx +6 -6
  334. package/.docs/raw/reference/streaming/workflows/stream.mdx +10 -10
  335. package/.docs/raw/reference/streaming/workflows/streamVNext.mdx +11 -11
  336. package/.docs/raw/reference/templates/overview.mdx +3 -3
  337. package/.docs/raw/reference/tools/create-tool.mdx +52 -35
  338. package/.docs/raw/reference/tools/graph-rag-tool.mdx +15 -15
  339. package/.docs/raw/reference/tools/mcp-client.mdx +1 -1
  340. package/.docs/raw/reference/tools/mcp-server.mdx +119 -35
  341. package/.docs/raw/reference/tools/vector-query-tool.mdx +27 -26
  342. package/.docs/raw/reference/vectors/couchbase.mdx +8 -2
  343. package/.docs/raw/reference/vectors/libsql.mdx +2 -1
  344. package/.docs/raw/reference/vectors/mongodb.mdx +7 -1
  345. package/.docs/raw/reference/vectors/pg.mdx +3 -0
  346. package/.docs/raw/reference/vectors/s3vectors.mdx +1 -1
  347. package/.docs/raw/reference/vectors/upstash.mdx +1 -0
  348. package/.docs/raw/reference/voice/google-gemini-live.mdx +1 -1
  349. package/.docs/raw/reference/voice/voice.addTools.mdx +3 -3
  350. package/.docs/raw/reference/workflows/run-methods/cancel.mdx +4 -4
  351. package/.docs/raw/reference/workflows/run-methods/resume.mdx +14 -14
  352. package/.docs/raw/reference/workflows/run-methods/start.mdx +17 -17
  353. package/.docs/raw/reference/workflows/run.mdx +1 -8
  354. package/.docs/raw/reference/workflows/step.mdx +5 -5
  355. package/.docs/raw/reference/workflows/workflow-methods/branch.mdx +2 -2
  356. package/.docs/raw/reference/workflows/workflow-methods/commit.mdx +1 -1
  357. package/.docs/raw/reference/workflows/workflow-methods/create-run.mdx +7 -13
  358. package/.docs/raw/reference/workflows/workflow-methods/dountil.mdx +1 -1
  359. package/.docs/raw/reference/workflows/workflow-methods/dowhile.mdx +1 -1
  360. package/.docs/raw/reference/workflows/workflow-methods/foreach.mdx +1 -1
  361. package/.docs/raw/reference/workflows/workflow-methods/map.mdx +5 -0
  362. package/.docs/raw/reference/workflows/workflow-methods/parallel.mdx +1 -1
  363. package/.docs/raw/reference/workflows/workflow-methods/sendEvent.mdx +2 -2
  364. package/.docs/raw/reference/workflows/workflow-methods/sleep.mdx +1 -1
  365. package/.docs/raw/reference/workflows/workflow-methods/sleepUntil.mdx +1 -1
  366. package/.docs/raw/reference/workflows/workflow-methods/then.mdx +1 -1
  367. package/.docs/raw/reference/workflows/workflow-methods/waitForEvent.mdx +1 -1
  368. package/.docs/raw/reference/workflows/workflow.mdx +1 -1
  369. package/.docs/raw/server-db/custom-api-routes.mdx +2 -2
  370. package/.docs/raw/server-db/mastra-client.mdx +23 -22
  371. package/.docs/raw/server-db/middleware.mdx +7 -7
  372. package/.docs/raw/server-db/production-server.mdx +4 -4
  373. package/.docs/raw/server-db/{runtime-context.mdx → request-context.mdx} +46 -45
  374. package/.docs/raw/server-db/storage.mdx +29 -21
  375. package/.docs/raw/streaming/events.mdx +3 -3
  376. package/.docs/raw/streaming/overview.mdx +5 -5
  377. package/.docs/raw/streaming/tool-streaming.mdx +18 -17
  378. package/.docs/raw/streaming/workflow-streaming.mdx +1 -1
  379. package/.docs/raw/tools-mcp/advanced-usage.mdx +5 -4
  380. package/.docs/raw/tools-mcp/mcp-overview.mdx +33 -20
  381. package/.docs/raw/tools-mcp/overview.mdx +11 -11
  382. package/.docs/raw/voice/overview.mdx +63 -43
  383. package/.docs/raw/voice/speech-to-speech.mdx +5 -3
  384. package/.docs/raw/voice/speech-to-text.mdx +10 -9
  385. package/.docs/raw/voice/text-to-speech.mdx +13 -12
  386. package/.docs/raw/workflows/agents-and-tools.mdx +9 -5
  387. package/.docs/raw/workflows/control-flow.mdx +3 -3
  388. package/.docs/raw/workflows/error-handling.mdx +2 -21
  389. package/.docs/raw/workflows/human-in-the-loop.mdx +7 -4
  390. package/.docs/raw/workflows/inngest-workflow.mdx +3 -3
  391. package/.docs/raw/workflows/input-data-mapping.mdx +107 -0
  392. package/.docs/raw/workflows/overview.mdx +17 -16
  393. package/.docs/raw/workflows/snapshots.mdx +13 -11
  394. package/.docs/raw/workflows/suspend-and-resume.mdx +23 -15
  395. package/CHANGELOG.md +55 -53
  396. package/README.md +11 -2
  397. package/dist/{chunk-TUAHUTTB.js → chunk-5NJC7NRO.js} +3 -0
  398. package/dist/index.d.ts.map +1 -1
  399. package/dist/prepare-docs/copy-raw.d.ts.map +1 -1
  400. package/dist/prepare-docs/prepare.js +1 -1
  401. package/dist/prompts/migration.d.ts +6 -0
  402. package/dist/prompts/migration.d.ts.map +1 -0
  403. package/dist/stdio.js +402 -30
  404. package/dist/tools/migration.d.ts +40 -0
  405. package/dist/tools/migration.d.ts.map +1 -0
  406. package/package.json +8 -12
  407. package/.docs/organized/changelogs/%40mastra%2Fcloud.md +0 -302
  408. package/.docs/raw/observability/nextjs-tracing.mdx +0 -109
  409. package/.docs/raw/observability/otel-tracing.mdx +0 -189
  410. package/.docs/raw/reference/agents/getScorers.mdx +0 -69
  411. package/.docs/raw/reference/agents/getTools.mdx +0 -69
  412. package/.docs/raw/reference/agents/getWorkflows.mdx +0 -69
  413. package/.docs/raw/reference/client-js/workflows-legacy.mdx +0 -143
  414. package/.docs/raw/reference/core/getAgents.mdx +0 -35
  415. package/.docs/raw/reference/core/getLogs.mdx +0 -96
  416. package/.docs/raw/reference/core/getLogsByRunId.mdx +0 -87
  417. package/.docs/raw/reference/core/getMCPServers.mdx +0 -36
  418. package/.docs/raw/reference/core/getMemory.mdx +0 -36
  419. package/.docs/raw/reference/core/getScorerByName.mdx +0 -78
  420. package/.docs/raw/reference/core/getScorers.mdx +0 -43
  421. package/.docs/raw/reference/core/getVectors.mdx +0 -36
  422. package/.docs/raw/reference/core/getWorkflows.mdx +0 -45
  423. package/.docs/raw/reference/evals/context-position.mdx +0 -197
  424. package/.docs/raw/reference/evals/context-relevancy.mdx +0 -196
  425. package/.docs/raw/reference/evals/contextual-recall.mdx +0 -196
  426. package/.docs/raw/reference/evals/summarization.mdx +0 -212
  427. package/.docs/raw/reference/legacyWorkflows/after.mdx +0 -89
  428. package/.docs/raw/reference/legacyWorkflows/afterEvent.mdx +0 -79
  429. package/.docs/raw/reference/legacyWorkflows/commit.mdx +0 -33
  430. package/.docs/raw/reference/legacyWorkflows/createRun.mdx +0 -76
  431. package/.docs/raw/reference/legacyWorkflows/else.mdx +0 -68
  432. package/.docs/raw/reference/legacyWorkflows/events.mdx +0 -305
  433. package/.docs/raw/reference/legacyWorkflows/execute.mdx +0 -110
  434. package/.docs/raw/reference/legacyWorkflows/if.mdx +0 -108
  435. package/.docs/raw/reference/legacyWorkflows/resume.mdx +0 -158
  436. package/.docs/raw/reference/legacyWorkflows/resumeWithEvent.mdx +0 -133
  437. package/.docs/raw/reference/legacyWorkflows/snapshots.mdx +0 -207
  438. package/.docs/raw/reference/legacyWorkflows/start.mdx +0 -87
  439. package/.docs/raw/reference/legacyWorkflows/step-class.mdx +0 -100
  440. package/.docs/raw/reference/legacyWorkflows/step-condition.mdx +0 -137
  441. package/.docs/raw/reference/legacyWorkflows/step-function.mdx +0 -93
  442. package/.docs/raw/reference/legacyWorkflows/step-options.mdx +0 -69
  443. package/.docs/raw/reference/legacyWorkflows/step-retries.mdx +0 -196
  444. package/.docs/raw/reference/legacyWorkflows/suspend.mdx +0 -70
  445. package/.docs/raw/reference/legacyWorkflows/then.mdx +0 -72
  446. package/.docs/raw/reference/legacyWorkflows/until.mdx +0 -168
  447. package/.docs/raw/reference/legacyWorkflows/watch.mdx +0 -124
  448. package/.docs/raw/reference/legacyWorkflows/while.mdx +0 -168
  449. package/.docs/raw/reference/legacyWorkflows/workflow.mdx +0 -234
  450. package/.docs/raw/reference/memory/getThreadsByResourceId.mdx +0 -79
  451. package/.docs/raw/reference/memory/getThreadsByResourceIdPaginated.mdx +0 -110
  452. package/.docs/raw/reference/observability/ai-tracing/ai-tracing.mdx +0 -185
  453. package/.docs/raw/reference/observability/ai-tracing/configuration.mdx +0 -238
  454. package/.docs/raw/reference/observability/otel-tracing/otel-config.mdx +0 -117
  455. package/.docs/raw/reference/observability/otel-tracing/providers/arize-ax.mdx +0 -81
  456. package/.docs/raw/reference/observability/otel-tracing/providers/arize-phoenix.mdx +0 -121
  457. package/.docs/raw/reference/observability/otel-tracing/providers/braintrust.mdx +0 -40
  458. package/.docs/raw/reference/observability/otel-tracing/providers/dash0.mdx +0 -40
  459. package/.docs/raw/reference/observability/otel-tracing/providers/index.mdx +0 -20
  460. package/.docs/raw/reference/observability/otel-tracing/providers/keywordsai.mdx +0 -73
  461. package/.docs/raw/reference/observability/otel-tracing/providers/laminar.mdx +0 -41
  462. package/.docs/raw/reference/observability/otel-tracing/providers/langfuse.mdx +0 -84
  463. package/.docs/raw/reference/observability/otel-tracing/providers/langsmith.mdx +0 -48
  464. package/.docs/raw/reference/observability/otel-tracing/providers/langwatch.mdx +0 -43
  465. package/.docs/raw/reference/observability/otel-tracing/providers/new-relic.mdx +0 -40
  466. package/.docs/raw/reference/observability/otel-tracing/providers/signoz.mdx +0 -40
  467. package/.docs/raw/reference/observability/otel-tracing/providers/traceloop.mdx +0 -40
  468. package/.docs/raw/reference/scorers/answer-relevancy.mdx +0 -227
  469. package/.docs/raw/reference/scorers/bias.mdx +0 -228
  470. package/.docs/raw/reference/scorers/completeness.mdx +0 -214
  471. package/.docs/raw/reference/scorers/content-similarity.mdx +0 -197
  472. package/.docs/raw/reference/scorers/context-precision.mdx +0 -352
  473. package/.docs/raw/reference/scorers/faithfulness.mdx +0 -241
  474. package/.docs/raw/reference/scorers/hallucination.mdx +0 -252
  475. package/.docs/raw/reference/scorers/keyword-coverage.mdx +0 -229
  476. package/.docs/raw/reference/scorers/prompt-alignment.mdx +0 -668
  477. package/.docs/raw/reference/scorers/textual-difference.mdx +0 -203
  478. package/.docs/raw/reference/scorers/tone-consistency.mdx +0 -211
  479. package/.docs/raw/reference/scorers/toxicity.mdx +0 -228
  480. package/.docs/raw/reference/workflows/run-methods/watch.mdx +0 -73
  481. package/.docs/raw/scorers/evals-old-api/custom-eval.mdx +0 -24
  482. package/.docs/raw/scorers/evals-old-api/overview.mdx +0 -106
  483. package/.docs/raw/scorers/evals-old-api/running-in-ci.mdx +0 -85
  484. package/.docs/raw/scorers/evals-old-api/textual-evals.mdx +0 -58
  485. package/.docs/raw/scorers/off-the-shelf-scorers.mdx +0 -50
  486. package/.docs/raw/workflows-legacy/control-flow.mdx +0 -774
  487. package/.docs/raw/workflows-legacy/dynamic-workflows.mdx +0 -239
  488. package/.docs/raw/workflows-legacy/error-handling.mdx +0 -187
  489. package/.docs/raw/workflows-legacy/nested-workflows.mdx +0 -360
  490. package/.docs/raw/workflows-legacy/overview.mdx +0 -182
  491. package/.docs/raw/workflows-legacy/runtime-variables.mdx +0 -156
  492. package/.docs/raw/workflows-legacy/steps.mdx +0 -115
  493. package/.docs/raw/workflows-legacy/suspend-and-resume.mdx +0 -406
  494. package/.docs/raw/workflows-legacy/variables.mdx +0 -318
@@ -1,246 +1,668 @@
1
1
  ---
2
- title: "Reference: PromptAlignmentMetric | Evals | Mastra Docs"
3
- description: Documentation for the Prompt Alignment Metric in Mastra, which evaluates how well LLM outputs adhere to given prompt instructions.
2
+ title: "Reference: Prompt Alignment Scorer | Evals | Mastra Docs"
3
+ description: Documentation for the Prompt Alignment Scorer in Mastra. Evaluates how well agent responses align with user prompt intent, requirements, completeness, and appropriateness using multi-dimensional analysis.
4
4
  ---
5
5
 
6
- # PromptAlignmentMetric
6
+ import PropertiesTable from "@site/src/components/PropertiesTable";
7
7
 
8
- :::info Scorers
9
- This documentation refers to the legacy evals API. For the latest scorer features, see [Scorers](/docs/scorers/overview).
10
- :::
8
+ # Prompt Alignment Scorer
11
9
 
12
- The `PromptAlignmentMetric` class evaluates how strictly an LLM's output follows a set of given prompt instructions. It uses a judge-based system to verify each instruction is followed exactly and provides detailed reasoning for any deviations.
10
+ The `createPromptAlignmentScorerLLM()` function creates a scorer that evaluates how well agent responses align with user prompts across multiple dimensions: intent understanding, requirement fulfillment, response completeness, and format appropriateness.
13
11
 
14
- ## Basic Usage
15
-
16
- ```typescript
17
- import { openai } from "@ai-sdk/openai";
18
- import { PromptAlignmentMetric } from "@mastra/evals/llm";
19
-
20
- // Configure the model for evaluation
21
- const model = openai("gpt-4o-mini");
22
-
23
- const instructions = [
24
- "Start sentences with capital letters",
25
- "End each sentence with a period",
26
- "Use present tense",
27
- ];
28
-
29
- const metric = new PromptAlignmentMetric(model, {
30
- instructions,
31
- scale: 1,
32
- });
33
-
34
- const result = await metric.measure(
35
- "describe the weather",
36
- "The sun is shining. Clouds float in the sky. A gentle breeze blows.",
37
- );
38
-
39
- console.log(result.score); // Alignment score from 0-1
40
- console.log(result.info.reason); // Explanation of the score
41
- ```
42
-
43
- ## Constructor Parameters
12
+ ## Parameters
44
13
 
45
14
  <PropertiesTable
46
15
  content={[
47
16
  {
48
17
  name: "model",
49
- type: "LanguageModel",
18
+ type: "MastraModelConfig",
50
19
  description:
51
- "Configuration for the model used to evaluate instruction alignment",
52
- isOptional: false,
20
+ "The language model to use for evaluating prompt-response alignment",
21
+ required: true,
53
22
  },
54
23
  {
55
24
  name: "options",
56
25
  type: "PromptAlignmentOptions",
57
- description: "Configuration options for the metric",
58
- isOptional: false,
26
+ description: "Configuration options for the scorer",
27
+ required: false,
28
+ children: [
29
+ {
30
+ name: "scale",
31
+ type: "number",
32
+ description: "Scale factor to multiply the final score (default: 1)",
33
+ required: false,
34
+ },
35
+ {
36
+ name: "evaluationMode",
37
+ type: "'user' | 'system' | 'both'",
38
+ description:
39
+ "Evaluation mode - 'user' evaluates user prompt alignment only, 'system' evaluates system compliance only, 'both' evaluates both with weighted scoring (default: 'both')",
40
+ required: false,
41
+ },
42
+ ],
59
43
  },
60
44
  ]}
61
45
  />
62
46
 
63
- ### PromptAlignmentOptions
47
+ ## .run() Returns
64
48
 
65
49
  <PropertiesTable
66
50
  content={[
67
51
  {
68
- name: "instructions",
69
- type: "string[]",
70
- description: "Array of instructions that the output should follow",
71
- isOptional: false,
52
+ name: "score",
53
+ type: "number",
54
+ description:
55
+ "Multi-dimensional alignment score between 0 and scale (default 0-1)",
72
56
  },
73
57
  {
74
- name: "scale",
75
- type: "number",
76
- description: "Maximum score value",
77
- isOptional: true,
78
- defaultValue: "1",
58
+ name: "reason",
59
+ type: "string",
60
+ description:
61
+ "Human-readable explanation of the prompt alignment evaluation with detailed breakdown",
79
62
  },
80
63
  ]}
81
64
  />
82
65
 
83
- ## measure() Parameters
66
+ `.run()` returns a result in the following shape:
84
67
 
85
- <PropertiesTable
86
- content={[
87
- {
88
- name: "input",
89
- type: "string",
90
- description: "The original prompt or query",
91
- isOptional: false,
68
+ ```typescript
69
+ {
70
+ runId: string,
71
+ score: number,
72
+ reason: string,
73
+ analyzeStepResult: {
74
+ intentAlignment: {
75
+ score: number,
76
+ primaryIntent: string,
77
+ isAddressed: boolean,
78
+ reasoning: string
92
79
  },
93
- {
94
- name: "output",
95
- type: "string",
96
- description: "The LLM's response to evaluate",
97
- isOptional: false,
80
+ requirementsFulfillment: {
81
+ requirements: Array<{
82
+ requirement: string,
83
+ isFulfilled: boolean,
84
+ reasoning: string
85
+ }>,
86
+ overallScore: number
98
87
  },
99
- ]}
100
- />
88
+ completeness: {
89
+ score: number,
90
+ missingElements: string[],
91
+ reasoning: string
92
+ },
93
+ responseAppropriateness: {
94
+ score: number,
95
+ formatAlignment: boolean,
96
+ toneAlignment: boolean,
97
+ reasoning: string
98
+ },
99
+ overallAssessment: string
100
+ }
101
+ }
102
+ ```
101
103
 
102
- ## Returns
104
+ ## Scoring Details
103
105
 
104
- <PropertiesTable
105
- content={[
106
+ ### Scorer configuration
107
+
108
+ You can customize the Prompt Alignment Scorer by adjusting the scale parameter and evaluation mode to fit your scoring needs.
109
+
110
+ ```typescript showLineNumbers copy
111
+ const scorer = createPromptAlignmentScorerLLM({
112
+ model: "openai/gpt-4o-mini",
113
+ options: {
114
+ scale: 10, // Score from 0-10 instead of 0-1
115
+ evaluationMode: "both", // 'user', 'system', or 'both' (default)
116
+ },
117
+ });
118
+ ```
119
+
120
+ ### Multi-Dimensional Analysis
121
+
122
+ Prompt Alignment evaluates responses across four key dimensions with weighted scoring that adapts based on the evaluation mode:
123
+
124
+ #### User Mode ('user')
125
+
126
+ Evaluates alignment with user prompts only:
127
+
128
+ 1. **Intent Alignment** (40% weight) - Whether the response addresses the user's core request
129
+ 2. **Requirements Fulfillment** (30% weight) - If all user requirements are met
130
+ 3. **Completeness** (20% weight) - Whether the response is comprehensive for user needs
131
+ 4. **Response Appropriateness** (10% weight) - If format and tone match user expectations
132
+
133
+ #### System Mode ('system')
134
+
135
+ Evaluates compliance with system guidelines only:
136
+
137
+ 1. **Intent Alignment** (35% weight) - Whether the response follows system behavioral guidelines
138
+ 2. **Requirements Fulfillment** (35% weight) - If all system constraints are respected
139
+ 3. **Completeness** (15% weight) - Whether the response adheres to all system rules
140
+ 4. **Response Appropriateness** (15% weight) - If format and tone match system specifications
141
+
142
+ #### Both Mode ('both' - default)
143
+
144
+ Combines evaluation of both user and system alignment:
145
+
146
+ - **User alignment**: 70% of final score (using user mode weights)
147
+ - **System compliance**: 30% of final score (using system mode weights)
148
+ - Provides balanced assessment of user satisfaction and system adherence
149
+
150
+ ### Scoring Formula
151
+
152
+ **User Mode:**
153
+
154
+ ```
155
+ Weighted Score = (intent_score × 0.4) + (requirements_score × 0.3) +
156
+ (completeness_score × 0.2) + (appropriateness_score × 0.1)
157
+ Final Score = Weighted Score × scale
158
+ ```
159
+
160
+ **System Mode:**
161
+
162
+ ```
163
+ Weighted Score = (intent_score × 0.35) + (requirements_score × 0.35) +
164
+ (completeness_score × 0.15) + (appropriateness_score × 0.15)
165
+ Final Score = Weighted Score × scale
166
+ ```
167
+
168
+ **Both Mode (default):**
169
+
170
+ ```
171
+ User Score = (user dimensions with user weights)
172
+ System Score = (system dimensions with system weights)
173
+ Weighted Score = (User Score × 0.7) + (System Score × 0.3)
174
+ Final Score = Weighted Score × scale
175
+ ```
176
+
177
+ **Weight Distribution Rationale**:
178
+
179
+ - **User Mode**: Prioritizes intent (40%) and requirements (30%) for user satisfaction
180
+ - **System Mode**: Balances behavioral compliance (35%) and constraints (35%) equally
181
+ - **Both Mode**: 70/30 split ensures user needs are primary while maintaining system compliance
182
+
183
+ ### Score Interpretation
184
+
185
+ - **0.9-1.0** = Excellent alignment across all dimensions
186
+ - **0.8-0.9** = Very good alignment with minor gaps
187
+ - **0.7-0.8** = Good alignment but missing some requirements or completeness
188
+ - **0.6-0.7** = Moderate alignment with noticeable gaps
189
+ - **0.4-0.6** = Poor alignment with significant issues
190
+ - **0.0-0.4** = Very poor alignment, response doesn't address the prompt effectively
191
+
192
+ ### When to Use Each Mode
193
+
194
+ **User Mode (`'user'`)** - Use when:
195
+
196
+ - Evaluating customer service responses for user satisfaction
197
+ - Testing content generation quality from user perspective
198
+ - Measuring how well responses address user questions
199
+ - Focusing purely on request fulfillment without system constraints
200
+
201
+ **System Mode (`'system'`)** - Use when:
202
+
203
+ - Auditing AI safety and compliance with behavioral guidelines
204
+ - Ensuring agents follow brand voice and tone requirements
205
+ - Validating adherence to content policies and constraints
206
+ - Testing system-level behavioral consistency
207
+
208
+ **Both Mode (`'both'`)** - Use when (default, recommended):
209
+
210
+ - Comprehensive evaluation of overall AI agent performance
211
+ - Balancing user satisfaction with system compliance
212
+ - Production monitoring where both user and system requirements matter
213
+ - Holistic assessment of prompt-response alignment
214
+
215
+ ## Common Use Cases
216
+
217
+ ### Code Generation Evaluation
218
+
219
+ Ideal for evaluating:
220
+
221
+ - Programming task completion
222
+ - Code quality and completeness
223
+ - Adherence to coding requirements
224
+ - Format specifications (functions, classes, etc.)
225
+
226
+ ```typescript
227
+ // Example: API endpoint creation
228
+ const codePrompt =
229
+ "Create a REST API endpoint with authentication and rate limiting";
230
+ // Scorer evaluates: intent (API creation), requirements (auth + rate limiting),
231
+ // completeness (full implementation), format (code structure)
232
+ ```
233
+
234
+ ### Instruction Following Assessment
235
+
236
+ Perfect for:
237
+
238
+ - Task completion verification
239
+ - Multi-step instruction adherence
240
+ - Requirement compliance checking
241
+ - Educational content evaluation
242
+
243
+ ```typescript
244
+ // Example: Multi-requirement task
245
+ const taskPrompt =
246
+ "Write a Python class with initialization, validation, error handling, and documentation";
247
+ // Scorer tracks each requirement individually and provides detailed breakdown
248
+ ```
249
+
250
+ ### Content Format Validation
251
+
252
+ Useful for:
253
+
254
+ - Format specification compliance
255
+ - Style guide adherence
256
+ - Output structure verification
257
+ - Response appropriateness checking
258
+
259
+ ```typescript
260
+ // Example: Structured output
261
+ const formatPrompt =
262
+ "Explain the differences between let and const in JavaScript using bullet points";
263
+ // Scorer evaluates content accuracy AND format compliance
264
+ ```
265
+
266
+ ### Agent Response Quality
267
+
268
+ Measure how well your AI agents follow user instructions:
269
+
270
+ ```typescript
271
+ const agent = new Agent({
272
+ name: "CodingAssistant",
273
+ instructions:
274
+ "You are a helpful coding assistant. Always provide working code examples.",
275
+ model: "openai/gpt-4o",
276
+ });
277
+
278
+ // Evaluate comprehensive alignment (default)
279
+ const scorer = createPromptAlignmentScorerLLM({
280
+ model: "openai/gpt-4o-mini",
281
+ options: { evaluationMode: "both" }, // Evaluates both user intent and system guidelines
282
+ });
283
+
284
+ // Evaluate just user satisfaction
285
+ const userScorer = createPromptAlignmentScorerLLM({
286
+ model: "openai/gpt-4o-mini",
287
+ options: { evaluationMode: "user" }, // Focus only on user request fulfillment
288
+ });
289
+
290
+ // Evaluate system compliance
291
+ const systemScorer = createPromptAlignmentScorerLLM({
292
+ model: "openai/gpt-4o-mini",
293
+ options: { evaluationMode: "system" }, // Check adherence to system instructions
294
+ });
295
+
296
+ const result = await scorer.run(agentRun);
297
+ ```
298
+
299
+ ### Prompt Engineering Optimization
300
+
301
+ Test different prompts to improve alignment:
302
+
303
+ ```typescript
304
+ const prompts = [
305
+ "Write a function to calculate factorial",
306
+ "Create a Python function that calculates factorial with error handling for negative inputs",
307
+ "Implement a factorial calculator in Python with: input validation, error handling, and docstring",
308
+ ];
309
+
310
+ // Compare alignment scores to find the best prompt
311
+ for (const prompt of prompts) {
312
+ const result = await scorer.run(createTestRun(prompt, response));
313
+ console.log(`Prompt alignment: ${result.score}`);
314
+ }
315
+ ```
316
+
317
+ ### Multi-Agent System Evaluation
318
+
319
+ Compare different agents or models:
320
+
321
+ ```typescript
322
+ const agents = [agent1, agent2, agent3];
323
+ const testPrompts = [...]; // Array of test prompts
324
+
325
+ for (const agent of agents) {
326
+ let totalScore = 0;
327
+ for (const prompt of testPrompts) {
328
+ const response = await agent.run(prompt);
329
+ const evaluation = await scorer.run({ input: prompt, output: response });
330
+ totalScore += evaluation.score;
331
+ }
332
+ console.log(`${agent.name} average alignment: ${totalScore / testPrompts.length}`);
333
+ }
334
+ ```
335
+
336
+ ## Examples
337
+
338
+ ### Basic Configuration
339
+
340
+ ```typescript
341
+ import { createPromptAlignmentScorerLLM } from "@mastra/evals";
342
+
343
+ const scorer = createPromptAlignmentScorerLLM({
344
+ model: "openai/gpt-4o",
345
+ });
346
+
347
+ // Evaluate a code generation task
348
+ const result = await scorer.run({
349
+ input: [
106
350
  {
107
- name: "score",
108
- type: "number",
109
- description: "Alignment score (0 to scale, default 0-1)",
351
+ role: "user",
352
+ content:
353
+ "Write a Python function to calculate factorial with error handling",
110
354
  },
355
+ ],
356
+ output: {
357
+ role: "assistant",
358
+ text: `def factorial(n):
359
+ if n < 0:
360
+ raise ValueError("Factorial not defined for negative numbers")
361
+ if n == 0:
362
+ return 1
363
+ return n * factorial(n-1)`,
364
+ },
365
+ });
366
+ // Result: { score: 0.95, reason: "Excellent alignment - function addresses intent, includes error handling..." }
367
+ ```
368
+
369
+ ### Custom Configuration Examples
370
+
371
+ ```typescript
372
+ // Configure scale and evaluation mode
373
+ const scorer = createPromptAlignmentScorerLLM({
374
+ model: "openai/gpt-4o",
375
+ options: {
376
+ scale: 10, // Score from 0-10 instead of 0-1
377
+ evaluationMode: "both", // 'user', 'system', or 'both' (default)
378
+ },
379
+ });
380
+
381
+ // User-only evaluation - focus on user satisfaction
382
+ const userScorer = createPromptAlignmentScorerLLM({
383
+ model: "openai/gpt-4o",
384
+ options: { evaluationMode: "user" },
385
+ });
386
+
387
+ // System-only evaluation - focus on compliance
388
+ const systemScorer = createPromptAlignmentScorerLLM({
389
+ model: "openai/gpt-4o",
390
+ options: { evaluationMode: "system" },
391
+ });
392
+
393
+ const result = await scorer.run(testRun);
394
+ // Result: { score: 8.5, reason: "Score: 8.5 out of 10 - Good alignment with both user intent and system guidelines..." }
395
+ ```
396
+
397
+ ### Format-Specific Evaluation
398
+
399
+ ```typescript
400
+ // Evaluate bullet point formatting
401
+ const result = await scorer.run({
402
+ input: [
111
403
  {
112
- name: "info",
113
- type: "object",
114
- description:
115
- "Object containing detailed metrics about instruction compliance",
116
- properties: [
117
- {
118
- type: "string",
119
- parameters: [
120
- {
121
- name: "reason",
122
- type: "string",
123
- description:
124
- "Detailed explanation of the score and instruction compliance",
125
- },
126
- ],
127
- },
128
- ],
404
+ role: "user",
405
+ content: "List the benefits of TypeScript in bullet points",
129
406
  },
130
- ]}
131
- />
407
+ ],
408
+ output: {
409
+ role: "assistant",
410
+ text: "TypeScript provides static typing, better IDE support, and enhanced code reliability.",
411
+ },
412
+ });
413
+ // Result: Lower appropriateness score due to format mismatch (paragraph vs bullet points)
414
+ ```
132
415
 
133
- ## Scoring Details
416
+ ### Excellent alignment example
417
+
418
+ In this example, the response fully addresses the user's prompt with all requirements met.
134
419
 
135
- The metric evaluates instruction alignment through:
420
+ ```typescript title="src/example-excellent-prompt-alignment.ts" showLineNumbers copy
421
+ import { createPromptAlignmentScorerLLM } from "@mastra/evals/scorers/prebuilt";
136
422
 
137
- - Applicability assessment for each instruction
138
- - Strict compliance evaluation for applicable instructions
139
- - Detailed reasoning for all verdicts
140
- - Proportional scoring based on applicable instructions
423
+ const scorer = createPromptAlignmentScorerLLM({
424
+ model: "openai/gpt-4o-mini",
425
+ });
141
426
 
142
- ### Instruction Verdicts
427
+ const inputMessages = [
428
+ {
429
+ role: "user",
430
+ content:
431
+ "Write a Python function to calculate factorial with error handling for negative numbers",
432
+ },
433
+ ];
143
434
 
144
- Each instruction receives one of three verdicts:
435
+ const outputMessage = {
436
+ text: `def factorial(n):
437
+ """Calculate factorial of a number."""
438
+ if n < 0:
439
+ raise ValueError("Factorial not defined for negative numbers")
440
+ if n == 0 or n == 1:
441
+ return 1
442
+ return n * factorial(n - 1)`,
443
+ };
444
+
445
+ const result = await scorer.run({
446
+ input: inputMessages,
447
+ output: outputMessage,
448
+ });
145
449
 
146
- - "yes": Instruction is applicable and completely followed
147
- - "no": Instruction is applicable but not followed or only partially followed
148
- - "n/a": Instruction is not applicable to the given context
450
+ console.log(result);
451
+ ```
149
452
 
150
- ### Scoring Process
453
+ ### Excellent alignment output
151
454
 
152
- 1. Evaluates instruction applicability:
153
- - Determines if each instruction applies to the context
154
- - Marks irrelevant instructions as "n/a"
155
- - Considers domain-specific requirements
455
+ The output receives a high score because it perfectly addresses the intent, fulfills all requirements, and uses appropriate format.
156
456
 
157
- 2. Assesses compliance for applicable instructions:
158
- - Evaluates each applicable instruction independently
159
- - Requires complete compliance for "yes" verdict
160
- - Documents specific reasons for all verdicts
457
+ ```typescript
458
+ {
459
+ score: 0.95,
460
+ reason: 'The score is 0.95 because the response perfectly addresses the primary intent of creating a factorial function and fulfills all requirements including Python implementation, error handling for negative numbers, and proper documentation. The code format is appropriate and the implementation is complete.'
461
+ }
462
+ ```
161
463
 
162
- 3. Calculates alignment score:
163
- - Counts followed instructions ("yes" verdicts)
164
- - Divides by total applicable instructions (excluding "n/a")
165
- - Scales to configured range
464
+ ### Partial alignment example
166
465
 
167
- Final score: `(followed_instructions / applicable_instructions) * scale`
466
+ In this example, the response addresses the core intent but misses some requirements or has format issues.
168
467
 
169
- ### Important Considerations
468
+ ```typescript title="src/example-partial-prompt-alignment.ts" showLineNumbers copy
469
+ import { createPromptAlignmentScorerLLM } from "@mastra/evals/scorers/prebuilt";
170
470
 
171
- - Empty outputs:
172
- - All formatting instructions are considered applicable
173
- - Marked as "no" since they cannot satisfy requirements
174
- - Domain-specific instructions:
175
- - Always applicable if about the queried domain
176
- - Marked as "no" if not followed, not "n/a"
177
- - "n/a" verdicts:
178
- - Only used for completely different domains
179
- - Do not affect the final score calculation
471
+ const scorer = createPromptAlignmentScorerLLM({
472
+ model: "openai/gpt-4o-mini",
473
+ });
180
474
 
181
- ### Score interpretation
475
+ const inputMessages = [
476
+ {
477
+ role: "user",
478
+ content: "List the benefits of TypeScript in bullet points",
479
+ },
480
+ ];
481
+
482
+ const outputMessage = {
483
+ text: "TypeScript provides static typing, better IDE support, and enhanced code reliability through compile-time error checking.",
484
+ };
485
+
486
+ const result = await scorer.run({
487
+ input: inputMessages,
488
+ output: outputMessage,
489
+ });
182
490
 
183
- (0 to scale, default 0-1)
491
+ console.log(result);
492
+ ```
184
493
 
185
- - 1.0: All applicable instructions followed perfectly
186
- - 0.7-0.9: Most applicable instructions followed
187
- - 0.4-0.6: Mixed compliance with applicable instructions
188
- - 0.1-0.3: Limited compliance with applicable instructions
189
- - 0.0: No applicable instructions followed
494
+ #### Partial alignment output
190
495
 
191
- ## Example with Analysis
496
+ The output receives a lower score because while the content is accurate, it doesn't follow the requested format (bullet points).
192
497
 
193
498
  ```typescript
194
- import { openai } from "@ai-sdk/openai";
195
- import { PromptAlignmentMetric } from "@mastra/evals/llm";
499
+ {
500
+ score: 0.75,
501
+ reason: 'The score is 0.75 because the response addresses the intent of explaining TypeScript benefits and provides accurate information, but fails to use the requested bullet point format, resulting in lower appropriateness scoring.'
502
+ }
503
+ ```
196
504
 
197
- // Configure the model for evaluation
198
- const model = openai("gpt-4o-mini");
505
+ ### Poor alignment example
199
506
 
200
- const metric = new PromptAlignmentMetric(model, {
201
- instructions: [
202
- "Use bullet points for each item",
203
- "Include exactly three examples",
204
- "End each point with a semicolon"
205
- ],
206
- scale: 1
207
- });
208
-
209
- const result = await metric.measure(
210
- "List three fruits",
211
- "• Apple is red and sweet;
212
- • Banana is yellow and curved;
213
- Orange is citrus and round."
214
- );
215
-
216
- // Example output:
217
- // {
218
- // score: 1.0,
219
- // info: {
220
- // reason: "The score is 1.0 because all instructions were followed exactly:
221
- // bullet points were used, exactly three examples were provided, and
222
- // each point ends with a semicolon."
223
- // }
224
- // }
225
-
226
- const result2 = await metric.measure(
227
- "List three fruits",
228
- "1. Apple
229
- 2. Banana
230
- 3. Orange and Grape"
231
- );
232
-
233
- // Example output:
234
- // {
235
- // score: 0.33,
236
- // info: {
237
- // reason: "The score is 0.33 because: numbered lists were used instead of bullet points,
238
- // no semicolons were used, and four fruits were listed instead of exactly three."
239
- // }
240
- // }
507
+ In this example, the response fails to address the user's specific requirements.
508
+
509
+ ```typescript title="src/example-poor-prompt-alignment.ts" showLineNumbers copy
510
+ import { createPromptAlignmentScorerLLM } from "@mastra/evals/scorers/prebuilt";
511
+
512
+ const scorer = createPromptAlignmentScorerLLM({
513
+ model: "openai/gpt-4o-mini",
514
+ });
515
+
516
+ const inputMessages = [
517
+ {
518
+ role: "user",
519
+ content:
520
+ "Write a Python class with initialization, validation, error handling, and documentation",
521
+ },
522
+ ];
523
+
524
+ const outputMessage = {
525
+ text: `class Example:
526
+ def __init__(self, value):
527
+ self.value = value`,
528
+ };
529
+
530
+ const result = await scorer.run({
531
+ input: inputMessages,
532
+ output: outputMessage,
533
+ });
534
+
535
+ console.log(result);
536
+ ```
537
+
538
+ ### Poor alignment output
539
+
540
+ The output receives a low score because it only partially fulfills the requirements, missing validation, error handling, and documentation.
541
+
542
+ ```typescript
543
+ {
544
+ score: 0.35,
545
+ reason: 'The score is 0.35 because while the response addresses the basic intent of creating a Python class with initialization, it fails to include validation, error handling, and documentation as specifically requested, resulting in incomplete requirement fulfillment.'
546
+ }
547
+ ```
548
+
549
+ ### Evaluation Mode Examples
550
+
551
+ #### User Mode - Focus on User Prompt Only
552
+
553
+ Evaluates how well the response addresses the user's request, ignoring system instructions:
554
+
555
+ ```typescript title="src/example-user-mode.ts" showLineNumbers copy
556
+ const scorer = createPromptAlignmentScorerLLM({
557
+ model: "openai/gpt-4o-mini",
558
+ options: { evaluationMode: "user" },
559
+ });
560
+
561
+ const result = await scorer.run({
562
+ input: {
563
+ inputMessages: [
564
+ {
565
+ role: "user",
566
+ content: "Explain recursion with an example",
567
+ },
568
+ ],
569
+ systemMessages: [
570
+ {
571
+ role: "system",
572
+ content: "Always provide code examples in Python",
573
+ },
574
+ ],
575
+ },
576
+ output: {
577
+ text: "Recursion is when a function calls itself. For example: factorial(5) = 5 * factorial(4)",
578
+ },
579
+ });
580
+ // Scores high for addressing user request, even without Python code
581
+ ```
582
+
583
+ #### System Mode - Focus on System Guidelines Only
584
+
585
+ Evaluates compliance with system behavioral guidelines and constraints:
586
+
587
+ ```typescript title="src/example-system-mode.ts" showLineNumbers copy
588
+ const scorer = createPromptAlignmentScorerLLM({
589
+ model: "openai/gpt-4o-mini",
590
+ options: { evaluationMode: "system" },
591
+ });
592
+
593
+ const result = await scorer.run({
594
+ input: {
595
+ systemMessages: [
596
+ {
597
+ role: "system",
598
+ content:
599
+ "You are a helpful assistant. Always be polite, concise, and provide examples.",
600
+ },
601
+ ],
602
+ inputMessages: [
603
+ {
604
+ role: "user",
605
+ content: "What is machine learning?",
606
+ },
607
+ ],
608
+ },
609
+ output: {
610
+ text: "Machine learning is a subset of AI where computers learn from data. For example, spam filters learn to identify unwanted emails by analyzing patterns in previously marked spam.",
611
+ },
612
+ });
613
+ // Evaluates politeness, conciseness, and example provision
241
614
  ```
242
615
 
616
+ #### Both Mode - Combined Evaluation (Default)
617
+
618
+ Evaluates both user intent fulfillment and system compliance with weighted scoring (70% user, 30% system):
619
+
620
+ ```typescript title="src/example-both-mode.ts" showLineNumbers copy
621
+ const scorer = createPromptAlignmentScorerLLM({
622
+ model: "openai/gpt-4o-mini",
623
+ options: { evaluationMode: "both" }, // This is the default
624
+ });
625
+
626
+ const result = await scorer.run({
627
+ input: {
628
+ systemMessages: [
629
+ {
630
+ role: "system",
631
+ content:
632
+ "Always provide code examples when explaining programming concepts",
633
+ },
634
+ ],
635
+ inputMessages: [
636
+ {
637
+ role: "user",
638
+ content: "Explain how to reverse a string",
639
+ },
640
+ ],
641
+ },
642
+ output: {
643
+ text: `To reverse a string, you can iterate through it backwards. Here's an example in Python:
644
+
645
+ def reverse_string(s):
646
+ return s[::-1]
647
+
648
+ # Usage: reverse_string("hello") returns "olleh"`,
649
+ },
650
+ });
651
+ // High score for both addressing the user's request AND following system guidelines
652
+ ```
653
+
654
+ ## Comparison with Other Scorers
655
+
656
+ | Aspect | Prompt Alignment | Answer Relevancy | Faithfulness |
657
+ | -------------- | ------------------------------------------ | ---------------------------- | -------------------------------- |
658
+ | **Focus** | Multi-dimensional prompt adherence | Query-response relevance | Context groundedness |
659
+ | **Evaluation** | Intent, requirements, completeness, format | Semantic similarity to query | Factual consistency with context |
660
+ | **Use Case** | General prompt following | Information retrieval | RAG/context-based systems |
661
+ | **Dimensions** | 4 weighted dimensions | Single relevance dimension | Single faithfulness dimension |
662
+
243
663
  ## Related
244
664
 
245
- - [Answer Relevancy Metric](./answer-relevancy)
246
- - [Keyword Coverage Metric](./keyword-coverage)
665
+ - [Answer Relevancy Scorer](/reference/v1/evals/answer-relevancy) - Evaluates query-response relevance
666
+ - [Faithfulness Scorer](/reference/v1/evals/faithfulness) - Measures context groundedness
667
+ - [Tool Call Accuracy Scorer](/reference/v1/evals/tool-call-accuracy) - Evaluates tool selection
668
+ - [Custom Scorers](/docs/v1/evals/custom-scorers) - Creating your own evaluation metrics