@mastra/mcp-docs-server 1.1.5 → 1.1.6-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/docs/agents/adding-voice.md +349 -0
- package/.docs/docs/agents/agent-approval.md +558 -0
- package/.docs/docs/agents/agent-memory.md +209 -0
- package/.docs/docs/agents/guardrails.md +374 -0
- package/.docs/docs/agents/network-approval.md +275 -0
- package/.docs/docs/agents/networks.md +299 -0
- package/.docs/docs/agents/overview.md +304 -0
- package/.docs/docs/agents/processors.md +622 -0
- package/.docs/docs/agents/structured-output.md +273 -0
- package/.docs/docs/agents/supervisor-agents.md +304 -0
- package/.docs/docs/agents/using-tools.md +214 -0
- package/.docs/docs/build-with-ai/mcp-docs-server.md +238 -0
- package/.docs/docs/build-with-ai/skills.md +35 -0
- package/.docs/docs/community/contributing-templates.md +3 -0
- package/.docs/docs/community/discord.md +9 -0
- package/.docs/docs/community/licensing.md +66 -0
- package/.docs/docs/deployment/cloud-providers.md +15 -0
- package/.docs/docs/deployment/mastra-server.md +122 -0
- package/.docs/docs/deployment/monorepo.md +142 -0
- package/.docs/docs/deployment/overview.md +62 -0
- package/.docs/docs/deployment/studio.md +239 -0
- package/.docs/docs/deployment/web-framework.md +52 -0
- package/.docs/docs/deployment/workflow-runners.md +9 -0
- package/.docs/docs/evals/built-in-scorers.md +47 -0
- package/.docs/docs/evals/custom-scorers.md +519 -0
- package/.docs/docs/evals/overview.md +141 -0
- package/.docs/docs/evals/running-in-ci.md +124 -0
- package/.docs/docs/getting-started/build-with-ai.md +68 -0
- package/.docs/docs/getting-started/manual-install.md +226 -0
- package/.docs/docs/getting-started/project-structure.md +60 -0
- package/.docs/docs/getting-started/start.md +28 -0
- package/.docs/docs/getting-started/studio.md +101 -0
- package/.docs/docs/index.md +43 -0
- package/.docs/docs/mastra-cloud/deployment.md +77 -0
- package/.docs/docs/mastra-cloud/observability.md +38 -0
- package/.docs/docs/mastra-cloud/overview.md +23 -0
- package/.docs/docs/mastra-cloud/setup.md +42 -0
- package/.docs/docs/mastra-cloud/studio.md +24 -0
- package/.docs/docs/mastra-code/configuration.md +299 -0
- package/.docs/docs/mastra-code/customization.md +228 -0
- package/.docs/docs/mastra-code/modes.md +104 -0
- package/.docs/docs/mastra-code/overview.md +135 -0
- package/.docs/docs/mastra-code/tools.md +229 -0
- package/.docs/docs/mcp/overview.md +373 -0
- package/.docs/docs/mcp/publishing-mcp-server.md +95 -0
- package/.docs/docs/memory/memory-processors.md +314 -0
- package/.docs/docs/memory/message-history.md +260 -0
- package/.docs/docs/memory/observational-memory.md +248 -0
- package/.docs/docs/memory/overview.md +45 -0
- package/.docs/docs/memory/semantic-recall.md +272 -0
- package/.docs/docs/memory/storage.md +261 -0
- package/.docs/docs/memory/working-memory.md +400 -0
- package/.docs/docs/observability/datasets/overview.md +198 -0
- package/.docs/docs/observability/datasets/running-experiments.md +274 -0
- package/.docs/docs/observability/logging.md +99 -0
- package/.docs/docs/observability/overview.md +70 -0
- package/.docs/docs/observability/tracing/bridges/otel.md +209 -0
- package/.docs/docs/observability/tracing/exporters/arize.md +272 -0
- package/.docs/docs/observability/tracing/exporters/braintrust.md +111 -0
- package/.docs/docs/observability/tracing/exporters/cloud.md +127 -0
- package/.docs/docs/observability/tracing/exporters/datadog.md +187 -0
- package/.docs/docs/observability/tracing/exporters/default.md +209 -0
- package/.docs/docs/observability/tracing/exporters/laminar.md +100 -0
- package/.docs/docs/observability/tracing/exporters/langfuse.md +213 -0
- package/.docs/docs/observability/tracing/exporters/langsmith.md +198 -0
- package/.docs/docs/observability/tracing/exporters/otel.md +476 -0
- package/.docs/docs/observability/tracing/exporters/posthog.md +148 -0
- package/.docs/docs/observability/tracing/exporters/sentry.md +208 -0
- package/.docs/docs/observability/tracing/overview.md +1112 -0
- package/.docs/docs/observability/tracing/processors/sensitive-data-filter.md +300 -0
- package/.docs/docs/rag/chunking-and-embedding.md +183 -0
- package/.docs/docs/rag/graph-rag.md +215 -0
- package/.docs/docs/rag/overview.md +72 -0
- package/.docs/docs/rag/retrieval.md +515 -0
- package/.docs/docs/rag/vector-databases.md +645 -0
- package/.docs/docs/server/auth/auth0.md +220 -0
- package/.docs/docs/server/auth/better-auth.md +203 -0
- package/.docs/docs/server/auth/clerk.md +132 -0
- package/.docs/docs/server/auth/composite-auth.md +234 -0
- package/.docs/docs/server/auth/custom-auth-provider.md +513 -0
- package/.docs/docs/server/auth/firebase.md +272 -0
- package/.docs/docs/server/auth/jwt.md +110 -0
- package/.docs/docs/server/auth/simple-auth.md +180 -0
- package/.docs/docs/server/auth/supabase.md +117 -0
- package/.docs/docs/server/auth/workos.md +186 -0
- package/.docs/docs/server/auth.md +38 -0
- package/.docs/docs/server/custom-adapters.md +378 -0
- package/.docs/docs/server/custom-api-routes.md +267 -0
- package/.docs/docs/server/mastra-client.md +243 -0
- package/.docs/docs/server/mastra-server.md +71 -0
- package/.docs/docs/server/middleware.md +225 -0
- package/.docs/docs/server/request-context.md +471 -0
- package/.docs/docs/server/server-adapters.md +547 -0
- package/.docs/docs/streaming/events.md +237 -0
- package/.docs/docs/streaming/overview.md +175 -0
- package/.docs/docs/streaming/tool-streaming.md +175 -0
- package/.docs/docs/streaming/workflow-streaming.md +109 -0
- package/.docs/docs/voice/overview.md +959 -0
- package/.docs/docs/voice/speech-to-speech.md +102 -0
- package/.docs/docs/voice/speech-to-text.md +79 -0
- package/.docs/docs/voice/text-to-speech.md +83 -0
- package/.docs/docs/workflows/agents-and-tools.md +166 -0
- package/.docs/docs/workflows/control-flow.md +822 -0
- package/.docs/docs/workflows/error-handling.md +360 -0
- package/.docs/docs/workflows/human-in-the-loop.md +215 -0
- package/.docs/docs/workflows/overview.md +370 -0
- package/.docs/docs/workflows/snapshots.md +238 -0
- package/.docs/docs/workflows/suspend-and-resume.md +205 -0
- package/.docs/docs/workflows/time-travel.md +309 -0
- package/.docs/docs/workflows/workflow-state.md +181 -0
- package/.docs/docs/workspace/filesystem.md +164 -0
- package/.docs/docs/workspace/overview.md +239 -0
- package/.docs/docs/workspace/sandbox.md +63 -0
- package/.docs/docs/workspace/search.md +243 -0
- package/.docs/docs/workspace/skills.md +169 -0
- package/.docs/guides/agent-frameworks/ai-sdk.md +140 -0
- package/.docs/guides/build-your-ui/ai-sdk-ui.md +1499 -0
- package/.docs/guides/build-your-ui/assistant-ui.md +156 -0
- package/.docs/guides/build-your-ui/copilotkit.md +289 -0
- package/.docs/guides/deployment/amazon-ec2.md +130 -0
- package/.docs/guides/deployment/aws-lambda.md +248 -0
- package/.docs/guides/deployment/azure-app-services.md +114 -0
- package/.docs/guides/deployment/cloudflare.md +99 -0
- package/.docs/guides/deployment/digital-ocean.md +168 -0
- package/.docs/guides/deployment/inngest.md +682 -0
- package/.docs/guides/deployment/netlify.md +77 -0
- package/.docs/guides/deployment/vercel.md +101 -0
- package/.docs/guides/getting-started/astro.md +398 -0
- package/.docs/guides/getting-started/electron.md +504 -0
- package/.docs/guides/getting-started/express.md +251 -0
- package/.docs/guides/getting-started/hono.md +190 -0
- package/.docs/guides/getting-started/next-js.md +347 -0
- package/.docs/guides/getting-started/nuxt.md +497 -0
- package/.docs/guides/getting-started/quickstart.md +67 -0
- package/.docs/guides/getting-started/sveltekit.md +296 -0
- package/.docs/guides/getting-started/vite-react.md +425 -0
- package/.docs/guides/guide/ai-recruiter.md +226 -0
- package/.docs/guides/guide/chef-michel.md +211 -0
- package/.docs/guides/guide/code-review-bot.md +226 -0
- package/.docs/guides/guide/dev-assistant.md +307 -0
- package/.docs/guides/guide/docs-manager.md +238 -0
- package/.docs/guides/guide/github-actions-pr-description.md +236 -0
- package/.docs/guides/guide/notes-mcp-server.md +416 -0
- package/.docs/guides/guide/research-assistant.md +348 -0
- package/.docs/guides/guide/research-coordinator.md +416 -0
- package/.docs/guides/guide/stock-agent.md +132 -0
- package/.docs/guides/guide/web-search.md +320 -0
- package/.docs/guides/guide/whatsapp-chat-bot.md +405 -0
- package/.docs/guides/index.md +3 -0
- package/.docs/guides/migrations/agentnetwork.md +97 -0
- package/.docs/guides/migrations/ai-sdk-v4-to-v5.md +112 -0
- package/.docs/guides/migrations/network-to-supervisor.md +261 -0
- package/.docs/guides/migrations/upgrade-to-v1/agent.md +404 -0
- package/.docs/guides/migrations/upgrade-to-v1/cli.md +57 -0
- package/.docs/guides/migrations/upgrade-to-v1/client.md +337 -0
- package/.docs/guides/migrations/upgrade-to-v1/deployment.md +37 -0
- package/.docs/guides/migrations/upgrade-to-v1/evals.md +239 -0
- package/.docs/guides/migrations/upgrade-to-v1/mastra.md +143 -0
- package/.docs/guides/migrations/upgrade-to-v1/mcp.md +97 -0
- package/.docs/guides/migrations/upgrade-to-v1/memory.md +285 -0
- package/.docs/guides/migrations/upgrade-to-v1/overview.md +119 -0
- package/.docs/guides/migrations/upgrade-to-v1/processors.md +68 -0
- package/.docs/guides/migrations/upgrade-to-v1/rag.md +42 -0
- package/.docs/guides/migrations/upgrade-to-v1/storage.md +553 -0
- package/.docs/guides/migrations/upgrade-to-v1/tools.md +180 -0
- package/.docs/guides/migrations/upgrade-to-v1/tracing.md +412 -0
- package/.docs/guides/migrations/upgrade-to-v1/vectors.md +87 -0
- package/.docs/guides/migrations/upgrade-to-v1/voice.md +30 -0
- package/.docs/guides/migrations/upgrade-to-v1/workflows.md +341 -0
- package/.docs/guides/migrations/vnext-to-standard-apis.md +362 -0
- package/.docs/models/embeddings.md +161 -0
- package/.docs/models/gateways/azure-openai.md +128 -0
- package/.docs/models/gateways/custom-gateways.md +545 -0
- package/.docs/models/gateways/netlify.md +88 -0
- package/.docs/models/gateways/openrouter.md +219 -0
- package/.docs/models/gateways/vercel.md +225 -0
- package/.docs/models/gateways.md +14 -0
- package/.docs/models/index.md +286 -0
- package/.docs/models/providers/302ai.md +134 -0
- package/.docs/models/providers/abacus.md +125 -0
- package/.docs/models/providers/agentrouter.md +90 -0
- package/.docs/models/providers/aihubmix.md +107 -0
- package/.docs/models/providers/alibaba-cn.md +135 -0
- package/.docs/models/providers/alibaba.md +111 -0
- package/.docs/models/providers/amazon-bedrock.md +33 -0
- package/.docs/models/providers/anthropic.md +153 -0
- package/.docs/models/providers/azure.md +33 -0
- package/.docs/models/providers/bailing.md +72 -0
- package/.docs/models/providers/baseten.md +77 -0
- package/.docs/models/providers/berget.md +78 -0
- package/.docs/models/providers/cerebras.md +101 -0
- package/.docs/models/providers/chutes.md +136 -0
- package/.docs/models/providers/cloudflare-ai-gateway.md +33 -0
- package/.docs/models/providers/cloudflare-workers-ai.md +109 -0
- package/.docs/models/providers/cohere.md +33 -0
- package/.docs/models/providers/cortecs.md +91 -0
- package/.docs/models/providers/deepinfra.md +112 -0
- package/.docs/models/providers/deepseek.md +88 -0
- package/.docs/models/providers/fastrouter.md +84 -0
- package/.docs/models/providers/fireworks-ai.md +89 -0
- package/.docs/models/providers/firmware.md +85 -0
- package/.docs/models/providers/friendli.md +78 -0
- package/.docs/models/providers/github-models.md +125 -0
- package/.docs/models/providers/google-vertex.md +33 -0
- package/.docs/models/providers/google.md +159 -0
- package/.docs/models/providers/groq.md +107 -0
- package/.docs/models/providers/helicone.md +161 -0
- package/.docs/models/providers/huggingface.md +90 -0
- package/.docs/models/providers/iflowcn.md +84 -0
- package/.docs/models/providers/inception.md +72 -0
- package/.docs/models/providers/inference.md +79 -0
- package/.docs/models/providers/io-intelligence.md +87 -0
- package/.docs/models/providers/io-net.md +87 -0
- package/.docs/models/providers/jiekou.md +131 -0
- package/.docs/models/providers/kilo.md +333 -0
- package/.docs/models/providers/kimi-for-coding.md +100 -0
- package/.docs/models/providers/kuae-cloud-coding-plan.md +71 -0
- package/.docs/models/providers/llama.md +77 -0
- package/.docs/models/providers/lmstudio.md +73 -0
- package/.docs/models/providers/lucidquery.md +72 -0
- package/.docs/models/providers/minimax-cn-coding-plan.md +102 -0
- package/.docs/models/providers/minimax-cn.md +102 -0
- package/.docs/models/providers/minimax-coding-plan.md +102 -0
- package/.docs/models/providers/minimax.md +104 -0
- package/.docs/models/providers/mistral.md +124 -0
- package/.docs/models/providers/moark.md +72 -0
- package/.docs/models/providers/modelscope.md +77 -0
- package/.docs/models/providers/moonshotai-cn.md +76 -0
- package/.docs/models/providers/moonshotai.md +76 -0
- package/.docs/models/providers/morph.md +73 -0
- package/.docs/models/providers/nano-gpt.md +103 -0
- package/.docs/models/providers/nebius.md +116 -0
- package/.docs/models/providers/nova.md +72 -0
- package/.docs/models/providers/novita-ai.md +154 -0
- package/.docs/models/providers/nvidia.md +141 -0
- package/.docs/models/providers/ollama-cloud.md +103 -0
- package/.docs/models/providers/ollama.md +33 -0
- package/.docs/models/providers/openai.md +193 -0
- package/.docs/models/providers/opencode.md +100 -0
- package/.docs/models/providers/ovhcloud.md +83 -0
- package/.docs/models/providers/perplexity.md +100 -0
- package/.docs/models/providers/poe.md +183 -0
- package/.docs/models/providers/privatemode-ai.md +75 -0
- package/.docs/models/providers/requesty.md +90 -0
- package/.docs/models/providers/scaleway.md +84 -0
- package/.docs/models/providers/siliconflow-cn.md +138 -0
- package/.docs/models/providers/siliconflow.md +140 -0
- package/.docs/models/providers/stackit.md +78 -0
- package/.docs/models/providers/stepfun.md +73 -0
- package/.docs/models/providers/submodel.md +79 -0
- package/.docs/models/providers/synthetic.md +96 -0
- package/.docs/models/providers/togetherai.md +115 -0
- package/.docs/models/providers/upstage.md +73 -0
- package/.docs/models/providers/venice.md +95 -0
- package/.docs/models/providers/vivgrid.md +106 -0
- package/.docs/models/providers/vultr.md +75 -0
- package/.docs/models/providers/wandb.md +80 -0
- package/.docs/models/providers/xai.md +141 -0
- package/.docs/models/providers/xiaomi.md +71 -0
- package/.docs/models/providers/zai-coding-plan.md +80 -0
- package/.docs/models/providers/zai.md +79 -0
- package/.docs/models/providers/zenmux.md +161 -0
- package/.docs/models/providers/zhipuai-coding-plan.md +79 -0
- package/.docs/models/providers/zhipuai.md +79 -0
- package/.docs/models/providers.md +81 -0
- package/.docs/reference/agents/agent.md +141 -0
- package/.docs/reference/agents/generate.md +186 -0
- package/.docs/reference/agents/generateLegacy.md +173 -0
- package/.docs/reference/agents/getDefaultGenerateOptions.md +36 -0
- package/.docs/reference/agents/getDefaultOptions.md +34 -0
- package/.docs/reference/agents/getDefaultStreamOptions.md +36 -0
- package/.docs/reference/agents/getDescription.md +21 -0
- package/.docs/reference/agents/getInstructions.md +34 -0
- package/.docs/reference/agents/getLLM.md +37 -0
- package/.docs/reference/agents/getMemory.md +34 -0
- package/.docs/reference/agents/getModel.md +34 -0
- package/.docs/reference/agents/getTools.md +29 -0
- package/.docs/reference/agents/getVoice.md +34 -0
- package/.docs/reference/agents/listAgents.md +35 -0
- package/.docs/reference/agents/listScorers.md +34 -0
- package/.docs/reference/agents/listTools.md +34 -0
- package/.docs/reference/agents/listWorkflows.md +34 -0
- package/.docs/reference/agents/network.md +133 -0
- package/.docs/reference/ai-sdk/chat-route.md +82 -0
- package/.docs/reference/ai-sdk/handle-chat-stream.md +53 -0
- package/.docs/reference/ai-sdk/handle-network-stream.md +37 -0
- package/.docs/reference/ai-sdk/handle-workflow-stream.md +55 -0
- package/.docs/reference/ai-sdk/network-route.md +74 -0
- package/.docs/reference/ai-sdk/to-ai-sdk-stream.md +231 -0
- package/.docs/reference/ai-sdk/to-ai-sdk-v4-messages.md +79 -0
- package/.docs/reference/ai-sdk/to-ai-sdk-v5-messages.md +76 -0
- package/.docs/reference/ai-sdk/with-mastra.md +59 -0
- package/.docs/reference/ai-sdk/workflow-route.md +79 -0
- package/.docs/reference/auth/auth0.md +73 -0
- package/.docs/reference/auth/better-auth.md +71 -0
- package/.docs/reference/auth/clerk.md +36 -0
- package/.docs/reference/auth/firebase.md +80 -0
- package/.docs/reference/auth/jwt.md +26 -0
- package/.docs/reference/auth/supabase.md +33 -0
- package/.docs/reference/auth/workos.md +84 -0
- package/.docs/reference/cli/create-mastra.md +137 -0
- package/.docs/reference/cli/mastra.md +336 -0
- package/.docs/reference/client-js/agents.md +437 -0
- package/.docs/reference/client-js/error-handling.md +16 -0
- package/.docs/reference/client-js/logs.md +24 -0
- package/.docs/reference/client-js/mastra-client.md +63 -0
- package/.docs/reference/client-js/memory.md +221 -0
- package/.docs/reference/client-js/observability.md +72 -0
- package/.docs/reference/client-js/telemetry.md +20 -0
- package/.docs/reference/client-js/tools.md +44 -0
- package/.docs/reference/client-js/vectors.md +79 -0
- package/.docs/reference/client-js/workflows.md +199 -0
- package/.docs/reference/configuration.md +752 -0
- package/.docs/reference/core/addGateway.md +42 -0
- package/.docs/reference/core/getAgent.md +21 -0
- package/.docs/reference/core/getAgentById.md +21 -0
- package/.docs/reference/core/getDeployer.md +22 -0
- package/.docs/reference/core/getGateway.md +38 -0
- package/.docs/reference/core/getGatewayById.md +41 -0
- package/.docs/reference/core/getLogger.md +22 -0
- package/.docs/reference/core/getMCPServer.md +47 -0
- package/.docs/reference/core/getMCPServerById.md +55 -0
- package/.docs/reference/core/getMemory.md +50 -0
- package/.docs/reference/core/getScorer.md +54 -0
- package/.docs/reference/core/getScorerById.md +54 -0
- package/.docs/reference/core/getServer.md +22 -0
- package/.docs/reference/core/getStorage.md +22 -0
- package/.docs/reference/core/getStoredAgentById.md +89 -0
- package/.docs/reference/core/getTelemetry.md +22 -0
- package/.docs/reference/core/getVector.md +22 -0
- package/.docs/reference/core/getWorkflow.md +42 -0
- package/.docs/reference/core/listAgents.md +21 -0
- package/.docs/reference/core/listGateways.md +40 -0
- package/.docs/reference/core/listLogs.md +38 -0
- package/.docs/reference/core/listLogsByRunId.md +36 -0
- package/.docs/reference/core/listMCPServers.md +55 -0
- package/.docs/reference/core/listMemory.md +56 -0
- package/.docs/reference/core/listScorers.md +29 -0
- package/.docs/reference/core/listStoredAgents.md +93 -0
- package/.docs/reference/core/listVectors.md +22 -0
- package/.docs/reference/core/listWorkflows.md +21 -0
- package/.docs/reference/core/mastra-class.md +66 -0
- package/.docs/reference/core/mastra-model-gateway.md +153 -0
- package/.docs/reference/core/setLogger.md +26 -0
- package/.docs/reference/core/setStorage.md +27 -0
- package/.docs/reference/datasets/addItem.md +37 -0
- package/.docs/reference/datasets/addItems.md +35 -0
- package/.docs/reference/datasets/compareExperiments.md +52 -0
- package/.docs/reference/datasets/create.md +51 -0
- package/.docs/reference/datasets/dataset.md +82 -0
- package/.docs/reference/datasets/datasets-manager.md +94 -0
- package/.docs/reference/datasets/delete.md +25 -0
- package/.docs/reference/datasets/deleteExperiment.md +27 -0
- package/.docs/reference/datasets/deleteItem.md +27 -0
- package/.docs/reference/datasets/deleteItems.md +29 -0
- package/.docs/reference/datasets/get.md +31 -0
- package/.docs/reference/datasets/getDetails.md +47 -0
- package/.docs/reference/datasets/getExperiment.md +30 -0
- package/.docs/reference/datasets/getItem.md +33 -0
- package/.docs/reference/datasets/getItemHistory.md +31 -0
- package/.docs/reference/datasets/list.md +31 -0
- package/.docs/reference/datasets/listExperimentResults.md +39 -0
- package/.docs/reference/datasets/listExperiments.md +33 -0
- package/.docs/reference/datasets/listItems.md +46 -0
- package/.docs/reference/datasets/listVersions.md +33 -0
- package/.docs/reference/datasets/startExperiment.md +62 -0
- package/.docs/reference/datasets/startExperimentAsync.md +43 -0
- package/.docs/reference/datasets/update.md +48 -0
- package/.docs/reference/datasets/updateItem.md +38 -0
- package/.docs/reference/deployer/cloudflare.md +79 -0
- package/.docs/reference/deployer/netlify.md +80 -0
- package/.docs/reference/deployer/vercel.md +91 -0
- package/.docs/reference/deployer.md +100 -0
- package/.docs/reference/evals/answer-relevancy.md +105 -0
- package/.docs/reference/evals/answer-similarity.md +99 -0
- package/.docs/reference/evals/bias.md +120 -0
- package/.docs/reference/evals/completeness.md +136 -0
- package/.docs/reference/evals/content-similarity.md +101 -0
- package/.docs/reference/evals/context-precision.md +196 -0
- package/.docs/reference/evals/context-relevance.md +531 -0
- package/.docs/reference/evals/create-scorer.md +270 -0
- package/.docs/reference/evals/faithfulness.md +114 -0
- package/.docs/reference/evals/hallucination.md +213 -0
- package/.docs/reference/evals/keyword-coverage.md +128 -0
- package/.docs/reference/evals/mastra-scorer.md +123 -0
- package/.docs/reference/evals/noise-sensitivity.md +675 -0
- package/.docs/reference/evals/prompt-alignment.md +614 -0
- package/.docs/reference/evals/run-evals.md +179 -0
- package/.docs/reference/evals/scorer-utils.md +326 -0
- package/.docs/reference/evals/textual-difference.md +113 -0
- package/.docs/reference/evals/tone-consistency.md +119 -0
- package/.docs/reference/evals/tool-call-accuracy.md +533 -0
- package/.docs/reference/evals/toxicity.md +123 -0
- package/.docs/reference/harness/harness-class.md +708 -0
- package/.docs/reference/index.md +277 -0
- package/.docs/reference/logging/pino-logger.md +117 -0
- package/.docs/reference/mastra-code/createMastraCode.md +108 -0
- package/.docs/reference/memory/clone-utilities.md +199 -0
- package/.docs/reference/memory/cloneThread.md +130 -0
- package/.docs/reference/memory/createThread.md +68 -0
- package/.docs/reference/memory/deleteMessages.md +38 -0
- package/.docs/reference/memory/getThreadById.md +24 -0
- package/.docs/reference/memory/listThreads.md +145 -0
- package/.docs/reference/memory/memory-class.md +147 -0
- package/.docs/reference/memory/observational-memory.md +565 -0
- package/.docs/reference/memory/recall.md +91 -0
- package/.docs/reference/observability/tracing/bridges/otel.md +131 -0
- package/.docs/reference/observability/tracing/configuration.md +178 -0
- package/.docs/reference/observability/tracing/exporters/arize.md +141 -0
- package/.docs/reference/observability/tracing/exporters/braintrust.md +93 -0
- package/.docs/reference/observability/tracing/exporters/cloud-exporter.md +163 -0
- package/.docs/reference/observability/tracing/exporters/console-exporter.md +138 -0
- package/.docs/reference/observability/tracing/exporters/datadog.md +116 -0
- package/.docs/reference/observability/tracing/exporters/default-exporter.md +174 -0
- package/.docs/reference/observability/tracing/exporters/laminar.md +78 -0
- package/.docs/reference/observability/tracing/exporters/langfuse.md +134 -0
- package/.docs/reference/observability/tracing/exporters/langsmith.md +108 -0
- package/.docs/reference/observability/tracing/exporters/otel.md +199 -0
- package/.docs/reference/observability/tracing/exporters/posthog.md +92 -0
- package/.docs/reference/observability/tracing/exporters/sentry.md +184 -0
- package/.docs/reference/observability/tracing/instances.md +107 -0
- package/.docs/reference/observability/tracing/interfaces.md +743 -0
- package/.docs/reference/observability/tracing/processors/sensitive-data-filter.md +144 -0
- package/.docs/reference/observability/tracing/spans.md +224 -0
- package/.docs/reference/processors/batch-parts-processor.md +61 -0
- package/.docs/reference/processors/language-detector.md +82 -0
- package/.docs/reference/processors/message-history-processor.md +85 -0
- package/.docs/reference/processors/moderation-processor.md +104 -0
- package/.docs/reference/processors/pii-detector.md +108 -0
- package/.docs/reference/processors/processor-interface.md +521 -0
- package/.docs/reference/processors/prompt-injection-detector.md +72 -0
- package/.docs/reference/processors/semantic-recall-processor.md +117 -0
- package/.docs/reference/processors/system-prompt-scrubber.md +80 -0
- package/.docs/reference/processors/token-limiter-processor.md +115 -0
- package/.docs/reference/processors/tool-call-filter.md +85 -0
- package/.docs/reference/processors/tool-search-processor.md +111 -0
- package/.docs/reference/processors/unicode-normalizer.md +62 -0
- package/.docs/reference/processors/working-memory-processor.md +152 -0
- package/.docs/reference/rag/chunk.md +221 -0
- package/.docs/reference/rag/database-config.md +261 -0
- package/.docs/reference/rag/document.md +114 -0
- package/.docs/reference/rag/embeddings.md +92 -0
- package/.docs/reference/rag/extract-params.md +168 -0
- package/.docs/reference/rag/graph-rag.md +111 -0
- package/.docs/reference/rag/metadata-filters.md +216 -0
- package/.docs/reference/rag/rerank.md +75 -0
- package/.docs/reference/rag/rerankWithScorer.md +80 -0
- package/.docs/reference/server/create-route.md +262 -0
- package/.docs/reference/server/express-adapter.md +176 -0
- package/.docs/reference/server/fastify-adapter.md +90 -0
- package/.docs/reference/server/hono-adapter.md +162 -0
- package/.docs/reference/server/koa-adapter.md +127 -0
- package/.docs/reference/server/mastra-server.md +298 -0
- package/.docs/reference/server/register-api-route.md +249 -0
- package/.docs/reference/server/routes.md +306 -0
- package/.docs/reference/storage/cloudflare-d1.md +218 -0
- package/.docs/reference/storage/cloudflare.md +88 -0
- package/.docs/reference/storage/composite.md +235 -0
- package/.docs/reference/storage/convex.md +161 -0
- package/.docs/reference/storage/dynamodb.md +282 -0
- package/.docs/reference/storage/lance.md +131 -0
- package/.docs/reference/storage/libsql.md +135 -0
- package/.docs/reference/storage/mongodb.md +262 -0
- package/.docs/reference/storage/mssql.md +157 -0
- package/.docs/reference/storage/overview.md +121 -0
- package/.docs/reference/storage/postgresql.md +526 -0
- package/.docs/reference/storage/upstash.md +160 -0
- package/.docs/reference/streaming/ChunkType.md +292 -0
- package/.docs/reference/streaming/agents/MastraModelOutput.md +182 -0
- package/.docs/reference/streaming/agents/stream.md +221 -0
- package/.docs/reference/streaming/agents/streamLegacy.md +142 -0
- package/.docs/reference/streaming/workflows/observeStream.md +42 -0
- package/.docs/reference/streaming/workflows/resumeStream.md +61 -0
- package/.docs/reference/streaming/workflows/stream.md +88 -0
- package/.docs/reference/streaming/workflows/timeTravelStream.md +142 -0
- package/.docs/reference/templates/overview.md +194 -0
- package/.docs/reference/tools/create-tool.md +237 -0
- package/.docs/reference/tools/document-chunker-tool.md +89 -0
- package/.docs/reference/tools/graph-rag-tool.md +182 -0
- package/.docs/reference/tools/mcp-client.md +954 -0
- package/.docs/reference/tools/mcp-server.md +1271 -0
- package/.docs/reference/tools/vector-query-tool.md +459 -0
- package/.docs/reference/vectors/astra.md +121 -0
- package/.docs/reference/vectors/chroma.md +264 -0
- package/.docs/reference/vectors/convex.md +300 -0
- package/.docs/reference/vectors/couchbase.md +226 -0
- package/.docs/reference/vectors/duckdb.md +318 -0
- package/.docs/reference/vectors/elasticsearch.md +189 -0
- package/.docs/reference/vectors/lance.md +220 -0
- package/.docs/reference/vectors/libsql.md +305 -0
- package/.docs/reference/vectors/mongodb.md +295 -0
- package/.docs/reference/vectors/opensearch.md +99 -0
- package/.docs/reference/vectors/pg.md +408 -0
- package/.docs/reference/vectors/pinecone.md +168 -0
- package/.docs/reference/vectors/qdrant.md +222 -0
- package/.docs/reference/vectors/s3vectors.md +277 -0
- package/.docs/reference/vectors/turbopuffer.md +157 -0
- package/.docs/reference/vectors/upstash.md +294 -0
- package/.docs/reference/vectors/vectorize.md +147 -0
- package/.docs/reference/voice/azure.md +148 -0
- package/.docs/reference/voice/cloudflare.md +83 -0
- package/.docs/reference/voice/composite-voice.md +121 -0
- package/.docs/reference/voice/deepgram.md +79 -0
- package/.docs/reference/voice/elevenlabs.md +98 -0
- package/.docs/reference/voice/google-gemini-live.md +378 -0
- package/.docs/reference/voice/google.md +228 -0
- package/.docs/reference/voice/mastra-voice.md +311 -0
- package/.docs/reference/voice/murf.md +122 -0
- package/.docs/reference/voice/openai-realtime.md +203 -0
- package/.docs/reference/voice/openai.md +88 -0
- package/.docs/reference/voice/playai.md +80 -0
- package/.docs/reference/voice/sarvam.md +126 -0
- package/.docs/reference/voice/speechify.md +75 -0
- package/.docs/reference/voice/voice.addInstructions.md +55 -0
- package/.docs/reference/voice/voice.addTools.md +67 -0
- package/.docs/reference/voice/voice.answer.md +54 -0
- package/.docs/reference/voice/voice.close.md +51 -0
- package/.docs/reference/voice/voice.connect.md +94 -0
- package/.docs/reference/voice/voice.events.md +37 -0
- package/.docs/reference/voice/voice.getSpeakers.md +129 -0
- package/.docs/reference/voice/voice.listen.md +164 -0
- package/.docs/reference/voice/voice.off.md +54 -0
- package/.docs/reference/voice/voice.on.md +111 -0
- package/.docs/reference/voice/voice.send.md +65 -0
- package/.docs/reference/voice/voice.speak.md +157 -0
- package/.docs/reference/voice/voice.updateConfig.md +60 -0
- package/.docs/reference/workflows/run-methods/cancel.md +86 -0
- package/.docs/reference/workflows/run-methods/restart.md +33 -0
- package/.docs/reference/workflows/run-methods/resume.md +59 -0
- package/.docs/reference/workflows/run-methods/start.md +58 -0
- package/.docs/reference/workflows/run-methods/startAsync.md +67 -0
- package/.docs/reference/workflows/run-methods/timeTravel.md +142 -0
- package/.docs/reference/workflows/run.md +59 -0
- package/.docs/reference/workflows/step.md +119 -0
- package/.docs/reference/workflows/workflow-methods/branch.md +25 -0
- package/.docs/reference/workflows/workflow-methods/commit.md +17 -0
- package/.docs/reference/workflows/workflow-methods/create-run.md +63 -0
- package/.docs/reference/workflows/workflow-methods/dountil.md +25 -0
- package/.docs/reference/workflows/workflow-methods/dowhile.md +25 -0
- package/.docs/reference/workflows/workflow-methods/foreach.md +118 -0
- package/.docs/reference/workflows/workflow-methods/map.md +93 -0
- package/.docs/reference/workflows/workflow-methods/parallel.md +21 -0
- package/.docs/reference/workflows/workflow-methods/sleep.md +35 -0
- package/.docs/reference/workflows/workflow-methods/sleepUntil.md +35 -0
- package/.docs/reference/workflows/workflow-methods/then.md +21 -0
- package/.docs/reference/workflows/workflow.md +157 -0
- package/.docs/reference/workspace/e2b-sandbox.md +289 -0
- package/.docs/reference/workspace/filesystem.md +255 -0
- package/.docs/reference/workspace/gcs-filesystem.md +174 -0
- package/.docs/reference/workspace/local-filesystem.md +343 -0
- package/.docs/reference/workspace/local-sandbox.md +301 -0
- package/.docs/reference/workspace/s3-filesystem.md +175 -0
- package/.docs/reference/workspace/sandbox.md +87 -0
- package/.docs/reference/workspace/workspace-class.md +244 -0
- package/CHANGELOG.md +8 -0
- package/package.json +5 -5
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
# runEvals
|
|
2
|
+
|
|
3
|
+
The `runEvals` function enables batch evaluation of agents and workflows by running multiple test cases against scorers concurrently. This is essential for systematic testing, performance analysis, and validation of AI systems.
|
|
4
|
+
|
|
5
|
+
## Usage Example
|
|
6
|
+
|
|
7
|
+
```typescript
|
|
8
|
+
import { runEvals } from '@mastra/core/evals'
|
|
9
|
+
import { myAgent } from './agents/my-agent'
|
|
10
|
+
import { myScorer1, myScorer2 } from './scorers'
|
|
11
|
+
|
|
12
|
+
const result = await runEvals({
|
|
13
|
+
target: myAgent,
|
|
14
|
+
data: [
|
|
15
|
+
{ input: 'What is machine learning?' },
|
|
16
|
+
{ input: 'Explain neural networks' },
|
|
17
|
+
{ input: 'How does AI work?' },
|
|
18
|
+
],
|
|
19
|
+
scorers: [myScorer1, myScorer2],
|
|
20
|
+
targetOptions: { maxSteps: 5 },
|
|
21
|
+
concurrency: 2,
|
|
22
|
+
onItemComplete: ({ item, targetResult, scorerResults }) => {
|
|
23
|
+
console.log(`Completed: ${item.input}`)
|
|
24
|
+
console.log(`Scores:`, scorerResults)
|
|
25
|
+
},
|
|
26
|
+
})
|
|
27
|
+
|
|
28
|
+
console.log(`Average scores:`, result.scores)
|
|
29
|
+
console.log(`Processed ${result.summary.totalItems} items`)
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Parameters
|
|
33
|
+
|
|
34
|
+
**target:** (`Agent | Workflow`): The agent or workflow to evaluate.
|
|
35
|
+
|
|
36
|
+
**data:** (`RunEvalsDataItem[]`): Array of test cases with input data and optional ground truth.
|
|
37
|
+
|
|
38
|
+
**scorers:** (`MastraScorer[] | WorkflowScorerConfig`): Array of scorers for agents, or configuration object for workflows specifying scorers for the workflow and individual steps.
|
|
39
|
+
|
|
40
|
+
**targetOptions?:** (`AgentExecutionOptions | WorkflowRunOptions`): Options forwarded to the target during execution. For agents: options passed to agent.generate() (e.g. maxSteps, modelSettings, instructions). For workflows: options passed to run.start() (e.g. perStep, outputOptions, initialState).
|
|
41
|
+
|
|
42
|
+
**concurrency?:** (`number`): Number of test cases to run concurrently. (Default: `1`)
|
|
43
|
+
|
|
44
|
+
**onItemComplete?:** (`function`): Callback function called after each test case completes. Receives item, target result, and scorer results.
|
|
45
|
+
|
|
46
|
+
## Data Item Structure
|
|
47
|
+
|
|
48
|
+
**input:** (`string | string[] | CoreMessage[] | any`): Input data for the target. For agents: messages or strings. For workflows: workflow input data.
|
|
49
|
+
|
|
50
|
+
**groundTruth?:** (`any`): Expected or reference output for comparison during scoring.
|
|
51
|
+
|
|
52
|
+
**requestContext?:** (`RequestContext`): Request Context to pass to the target during execution.
|
|
53
|
+
|
|
54
|
+
**tracingContext?:** (`TracingContext`): Tracing context for observability and debugging.
|
|
55
|
+
|
|
56
|
+
**startOptions?:** (`WorkflowRunOptions`): Per-item workflow run options (e.g. initialState, perStep, outputOptions). Merged on top of targetOptions, so per-item values take precedence. Only applicable when the target is a workflow.
|
|
57
|
+
|
|
58
|
+
## Workflow Scorer Configuration
|
|
59
|
+
|
|
60
|
+
For workflows, you can specify scorers at different levels using `WorkflowScorerConfig`:
|
|
61
|
+
|
|
62
|
+
**workflow?:** (`MastraScorer[]`): Array of scorers to evaluate the entire workflow output.
|
|
63
|
+
|
|
64
|
+
**steps?:** (`Record<string, MastraScorer[]>`): Object mapping step IDs to arrays of scorers for evaluating individual step outputs.
|
|
65
|
+
|
|
66
|
+
## Returns
|
|
67
|
+
|
|
68
|
+
**scores:** (`Record<string, any>`): Average scores across all test cases, organized by scorer name.
|
|
69
|
+
|
|
70
|
+
**summary:** (`object`): Summary information about the experiment execution.
|
|
71
|
+
|
|
72
|
+
**summary.totalItems:** (`number`): Total number of test cases processed.
|
|
73
|
+
|
|
74
|
+
## Examples
|
|
75
|
+
|
|
76
|
+
### Agent Evaluation
|
|
77
|
+
|
|
78
|
+
```typescript
|
|
79
|
+
import { createScorer, runEvals } from '@mastra/core/evals'
|
|
80
|
+
|
|
81
|
+
const myScorer = createScorer({
|
|
82
|
+
id: 'my-scorer',
|
|
83
|
+
description: "Check if Agent's response contains ground truth",
|
|
84
|
+
type: 'agent',
|
|
85
|
+
}).generateScore(({ run }) => {
|
|
86
|
+
const response = run.output[0]?.content || ''
|
|
87
|
+
const expectedResponse = run.groundTruth
|
|
88
|
+
return response.includes(expectedResponse) ? 1 : 0
|
|
89
|
+
})
|
|
90
|
+
|
|
91
|
+
const result = await runEvals({
|
|
92
|
+
target: chatAgent,
|
|
93
|
+
data: [
|
|
94
|
+
{
|
|
95
|
+
input: 'What is AI?',
|
|
96
|
+
groundTruth: 'AI is a field of computer science that creates intelligent machines.',
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
input: 'How does machine learning work?',
|
|
100
|
+
groundTruth: 'Machine learning uses algorithms to learn patterns from data.',
|
|
101
|
+
},
|
|
102
|
+
],
|
|
103
|
+
scorers: [relevancyScorer],
|
|
104
|
+
concurrency: 3,
|
|
105
|
+
})
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Agent with targetOptions
|
|
109
|
+
|
|
110
|
+
Pass execution options like `maxSteps` or `modelSettings` to customize agent behavior during evaluation:
|
|
111
|
+
|
|
112
|
+
```typescript
|
|
113
|
+
const result = await runEvals({
|
|
114
|
+
target: chatAgent,
|
|
115
|
+
data: [{ input: 'Summarize this article' }, { input: 'Translate to French' }],
|
|
116
|
+
scorers: [relevancyScorer],
|
|
117
|
+
targetOptions: {
|
|
118
|
+
maxSteps: 5,
|
|
119
|
+
modelSettings: { temperature: 0 },
|
|
120
|
+
},
|
|
121
|
+
})
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### Workflow Evaluation
|
|
125
|
+
|
|
126
|
+
```typescript
|
|
127
|
+
const workflowResult = await runEvals({
|
|
128
|
+
target: myWorkflow,
|
|
129
|
+
data: [
|
|
130
|
+
{ input: { query: 'Process this data', priority: 'high' } },
|
|
131
|
+
{ input: { query: 'Another task', priority: 'low' } },
|
|
132
|
+
],
|
|
133
|
+
scorers: {
|
|
134
|
+
workflow: [outputQualityScorer],
|
|
135
|
+
steps: {
|
|
136
|
+
'validation-step': [validationScorer],
|
|
137
|
+
'processing-step': [processingScorer],
|
|
138
|
+
},
|
|
139
|
+
},
|
|
140
|
+
onItemComplete: ({ item, targetResult, scorerResults }) => {
|
|
141
|
+
console.log(`Workflow completed for: ${item.inputData.query}`)
|
|
142
|
+
if (scorerResults.workflow) {
|
|
143
|
+
console.log('Workflow scores:', scorerResults.workflow)
|
|
144
|
+
}
|
|
145
|
+
if (scorerResults.steps) {
|
|
146
|
+
console.log('Step scores:', scorerResults.steps)
|
|
147
|
+
}
|
|
148
|
+
},
|
|
149
|
+
})
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### Workflow with per-item startOptions
|
|
153
|
+
|
|
154
|
+
Use `startOptions` on individual data items to customize each workflow run. Per-item values take precedence over `targetOptions`:
|
|
155
|
+
|
|
156
|
+
```typescript
|
|
157
|
+
const result = await runEvals({
|
|
158
|
+
target: myWorkflow,
|
|
159
|
+
data: [
|
|
160
|
+
{
|
|
161
|
+
input: { query: 'hello' },
|
|
162
|
+
startOptions: { initialState: { counter: 1 } },
|
|
163
|
+
},
|
|
164
|
+
{
|
|
165
|
+
input: { query: 'world' },
|
|
166
|
+
startOptions: { initialState: { counter: 2 } },
|
|
167
|
+
},
|
|
168
|
+
],
|
|
169
|
+
scorers: [outputQualityScorer],
|
|
170
|
+
targetOptions: { perStep: true },
|
|
171
|
+
})
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
## Related
|
|
175
|
+
|
|
176
|
+
- [createScorer()](https://mastra.ai/reference/evals/create-scorer) - Create custom scorers for experiments
|
|
177
|
+
- [MastraScorer](https://mastra.ai/reference/evals/mastra-scorer) - Learn about scorer structure and methods
|
|
178
|
+
- [Custom Scorers](https://mastra.ai/docs/evals/custom-scorers) - Guide to building evaluation logic
|
|
179
|
+
- [Scorers Overview](https://mastra.ai/docs/evals/overview) - Understanding scorer concepts
|
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
# Scorer Utils
|
|
2
|
+
|
|
3
|
+
Mastra provides utility functions to help extract and process data from scorer run inputs and outputs. These utilities are particularly useful in the `preprocess` step of custom scorers.
|
|
4
|
+
|
|
5
|
+
## Import
|
|
6
|
+
|
|
7
|
+
```typescript
|
|
8
|
+
import {
|
|
9
|
+
getAssistantMessageFromRunOutput,
|
|
10
|
+
getReasoningFromRunOutput,
|
|
11
|
+
getUserMessageFromRunInput,
|
|
12
|
+
getSystemMessagesFromRunInput,
|
|
13
|
+
getCombinedSystemPrompt,
|
|
14
|
+
extractToolCalls,
|
|
15
|
+
extractInputMessages,
|
|
16
|
+
extractAgentResponseMessages,
|
|
17
|
+
} from '@mastra/evals/scorers/utils'
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Message Extraction
|
|
21
|
+
|
|
22
|
+
### getAssistantMessageFromRunOutput
|
|
23
|
+
|
|
24
|
+
Extracts the text content from the first assistant message in the run output.
|
|
25
|
+
|
|
26
|
+
```typescript
|
|
27
|
+
const scorer = createScorer({
|
|
28
|
+
id: 'my-scorer',
|
|
29
|
+
description: 'My scorer',
|
|
30
|
+
type: 'agent',
|
|
31
|
+
})
|
|
32
|
+
.preprocess(({ run }) => {
|
|
33
|
+
const response = getAssistantMessageFromRunOutput(run.output)
|
|
34
|
+
return { response }
|
|
35
|
+
})
|
|
36
|
+
.generateScore(({ results }) => {
|
|
37
|
+
return results.preprocessStepResult?.response ? 1 : 0
|
|
38
|
+
})
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
**output?:** (`ScorerRunOutputForAgent`): The scorer run output (array of MastraDBMessage)
|
|
42
|
+
|
|
43
|
+
**Returns:** `string | undefined` - The assistant message text, or undefined if no assistant message is found.
|
|
44
|
+
|
|
45
|
+
### getUserMessageFromRunInput
|
|
46
|
+
|
|
47
|
+
Extracts the text content from the first user message in the run input.
|
|
48
|
+
|
|
49
|
+
```typescript
|
|
50
|
+
.preprocess(({ run }) => {
|
|
51
|
+
const userMessage = getUserMessageFromRunInput(run.input);
|
|
52
|
+
return { userMessage };
|
|
53
|
+
})
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
**input?:** (`ScorerRunInputForAgent`): The scorer run input containing input messages
|
|
57
|
+
|
|
58
|
+
**Returns:** `string | undefined` - The user message text, or undefined if no user message is found.
|
|
59
|
+
|
|
60
|
+
### extractInputMessages
|
|
61
|
+
|
|
62
|
+
Extracts text content from all input messages as an array.
|
|
63
|
+
|
|
64
|
+
```typescript
|
|
65
|
+
.preprocess(({ run }) => {
|
|
66
|
+
const allUserMessages = extractInputMessages(run.input);
|
|
67
|
+
return { conversationHistory: allUserMessages.join("\n") };
|
|
68
|
+
})
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
**Returns:** `string[]` - Array of text strings from each input message.
|
|
72
|
+
|
|
73
|
+
### extractAgentResponseMessages
|
|
74
|
+
|
|
75
|
+
Extracts text content from all assistant response messages as an array.
|
|
76
|
+
|
|
77
|
+
```typescript
|
|
78
|
+
.preprocess(({ run }) => {
|
|
79
|
+
const allResponses = extractAgentResponseMessages(run.output);
|
|
80
|
+
return { allResponses };
|
|
81
|
+
})
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
**Returns:** `string[]` - Array of text strings from each assistant message.
|
|
85
|
+
|
|
86
|
+
## Reasoning Extraction
|
|
87
|
+
|
|
88
|
+
### getReasoningFromRunOutput
|
|
89
|
+
|
|
90
|
+
Extracts reasoning text from the run output. This is particularly useful when evaluating responses from reasoning models like `deepseek-reasoner` that produce chain-of-thought reasoning.
|
|
91
|
+
|
|
92
|
+
Reasoning can be stored in two places:
|
|
93
|
+
|
|
94
|
+
1. `content.reasoning` - a string field on the message content
|
|
95
|
+
2. `content.parts` - as parts with `type: 'reasoning'` containing `details`
|
|
96
|
+
|
|
97
|
+
```typescript
|
|
98
|
+
import {
|
|
99
|
+
getReasoningFromRunOutput,
|
|
100
|
+
getAssistantMessageFromRunOutput,
|
|
101
|
+
} from '@mastra/evals/scorers/utils'
|
|
102
|
+
|
|
103
|
+
const reasoningQualityScorer = createScorer({
|
|
104
|
+
id: 'reasoning-quality',
|
|
105
|
+
name: 'Reasoning Quality',
|
|
106
|
+
description: 'Evaluates the quality of model reasoning',
|
|
107
|
+
type: 'agent',
|
|
108
|
+
})
|
|
109
|
+
.preprocess(({ run }) => {
|
|
110
|
+
const reasoning = getReasoningFromRunOutput(run.output)
|
|
111
|
+
const response = getAssistantMessageFromRunOutput(run.output)
|
|
112
|
+
return { reasoning, response }
|
|
113
|
+
})
|
|
114
|
+
.analyze(({ results }) => {
|
|
115
|
+
const { reasoning } = results.preprocessStepResult || {}
|
|
116
|
+
return {
|
|
117
|
+
hasReasoning: !!reasoning,
|
|
118
|
+
reasoningLength: reasoning?.length || 0,
|
|
119
|
+
hasStepByStep: reasoning?.includes('step') || false,
|
|
120
|
+
}
|
|
121
|
+
})
|
|
122
|
+
.generateScore(({ results }) => {
|
|
123
|
+
const { hasReasoning, reasoningLength } = results.analyzeStepResult || {}
|
|
124
|
+
if (!hasReasoning) return 0
|
|
125
|
+
// Score based on reasoning length (normalized to 0-1)
|
|
126
|
+
return Math.min(reasoningLength / 500, 1)
|
|
127
|
+
})
|
|
128
|
+
.generateReason(({ results, score }) => {
|
|
129
|
+
const { hasReasoning, reasoningLength } = results.analyzeStepResult || {}
|
|
130
|
+
if (!hasReasoning) {
|
|
131
|
+
return 'No reasoning was provided by the model.'
|
|
132
|
+
}
|
|
133
|
+
return `Model provided ${reasoningLength} characters of reasoning. Score: ${score}`
|
|
134
|
+
})
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
**output?:** (`ScorerRunOutputForAgent`): The scorer run output (array of MastraDBMessage)
|
|
138
|
+
|
|
139
|
+
**Returns:** `string | undefined` - The reasoning text, or undefined if no reasoning is present.
|
|
140
|
+
|
|
141
|
+
## System Message Extraction
|
|
142
|
+
|
|
143
|
+
### getSystemMessagesFromRunInput
|
|
144
|
+
|
|
145
|
+
Extracts all system messages from the run input, including both standard system messages and tagged system messages (specialized prompts like memory instructions).
|
|
146
|
+
|
|
147
|
+
```typescript
|
|
148
|
+
.preprocess(({ run }) => {
|
|
149
|
+
const systemMessages = getSystemMessagesFromRunInput(run.input);
|
|
150
|
+
return {
|
|
151
|
+
systemPromptCount: systemMessages.length,
|
|
152
|
+
systemPrompts: systemMessages
|
|
153
|
+
};
|
|
154
|
+
})
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
**Returns:** `string[]` - Array of system message strings.
|
|
158
|
+
|
|
159
|
+
### getCombinedSystemPrompt
|
|
160
|
+
|
|
161
|
+
Combines all system messages into a single prompt string, joined with double newlines.
|
|
162
|
+
|
|
163
|
+
```typescript
|
|
164
|
+
.preprocess(({ run }) => {
|
|
165
|
+
const fullSystemPrompt = getCombinedSystemPrompt(run.input);
|
|
166
|
+
return { fullSystemPrompt };
|
|
167
|
+
})
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
**Returns:** `string` - Combined system prompt string.
|
|
171
|
+
|
|
172
|
+
## Tool Call Extraction
|
|
173
|
+
|
|
174
|
+
### extractToolCalls
|
|
175
|
+
|
|
176
|
+
Extracts information about all tool calls from the run output, including tool names, call IDs, and their positions in the message array.
|
|
177
|
+
|
|
178
|
+
```typescript
|
|
179
|
+
const toolUsageScorer = createScorer({
|
|
180
|
+
id: 'tool-usage',
|
|
181
|
+
description: 'Evaluates tool usage patterns',
|
|
182
|
+
type: 'agent',
|
|
183
|
+
})
|
|
184
|
+
.preprocess(({ run }) => {
|
|
185
|
+
const { tools, toolCallInfos } = extractToolCalls(run.output)
|
|
186
|
+
return {
|
|
187
|
+
toolsUsed: tools,
|
|
188
|
+
toolCount: tools.length,
|
|
189
|
+
toolDetails: toolCallInfos,
|
|
190
|
+
}
|
|
191
|
+
})
|
|
192
|
+
.generateScore(({ results }) => {
|
|
193
|
+
const { toolCount } = results.preprocessStepResult || {}
|
|
194
|
+
// Score based on appropriate tool usage
|
|
195
|
+
return toolCount > 0 ? 1 : 0
|
|
196
|
+
})
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
**Returns:**
|
|
200
|
+
|
|
201
|
+
```typescript
|
|
202
|
+
{
|
|
203
|
+
tools: string[]; // Array of tool names
|
|
204
|
+
toolCallInfos: ToolCallInfo[]; // Detailed tool call information
|
|
205
|
+
}
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
Where `ToolCallInfo` is:
|
|
209
|
+
|
|
210
|
+
```typescript
|
|
211
|
+
type ToolCallInfo = {
|
|
212
|
+
toolName: string // Name of the tool
|
|
213
|
+
toolCallId: string // Unique call identifier
|
|
214
|
+
messageIndex: number // Index in the output array
|
|
215
|
+
invocationIndex: number // Index within message's tool invocations
|
|
216
|
+
}
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
## Test Utilities
|
|
220
|
+
|
|
221
|
+
These utilities help create test data for scorer development.
|
|
222
|
+
|
|
223
|
+
### createTestMessage
|
|
224
|
+
|
|
225
|
+
Creates a `MastraDBMessage` object for testing purposes.
|
|
226
|
+
|
|
227
|
+
```typescript
|
|
228
|
+
import { createTestMessage } from '@mastra/evals/scorers/utils'
|
|
229
|
+
|
|
230
|
+
const userMessage = createTestMessage({
|
|
231
|
+
content: 'What is the weather?',
|
|
232
|
+
role: 'user',
|
|
233
|
+
})
|
|
234
|
+
|
|
235
|
+
const assistantMessage = createTestMessage({
|
|
236
|
+
content: 'The weather is sunny.',
|
|
237
|
+
role: 'assistant',
|
|
238
|
+
toolInvocations: [
|
|
239
|
+
{
|
|
240
|
+
toolCallId: 'call-1',
|
|
241
|
+
toolName: 'weatherTool',
|
|
242
|
+
args: { location: 'London' },
|
|
243
|
+
result: { temp: 20 },
|
|
244
|
+
state: 'result',
|
|
245
|
+
},
|
|
246
|
+
],
|
|
247
|
+
})
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
### createAgentTestRun
|
|
251
|
+
|
|
252
|
+
Creates a complete test run object for testing scorers.
|
|
253
|
+
|
|
254
|
+
```typescript
|
|
255
|
+
import { createAgentTestRun, createTestMessage } from '@mastra/evals/scorers/utils'
|
|
256
|
+
|
|
257
|
+
const testRun = createAgentTestRun({
|
|
258
|
+
inputMessages: [createTestMessage({ content: 'Hello', role: 'user' })],
|
|
259
|
+
output: [createTestMessage({ content: 'Hi there!', role: 'assistant' })],
|
|
260
|
+
})
|
|
261
|
+
|
|
262
|
+
// Run your scorer with the test data
|
|
263
|
+
const result = await myScorer.run({
|
|
264
|
+
input: testRun.input,
|
|
265
|
+
output: testRun.output,
|
|
266
|
+
})
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
## Complete Example
|
|
270
|
+
|
|
271
|
+
Here's a complete example showing how to use multiple utilities together:
|
|
272
|
+
|
|
273
|
+
```typescript
|
|
274
|
+
import { createScorer } from '@mastra/core/evals'
|
|
275
|
+
import {
|
|
276
|
+
getAssistantMessageFromRunOutput,
|
|
277
|
+
getReasoningFromRunOutput,
|
|
278
|
+
getUserMessageFromRunInput,
|
|
279
|
+
getCombinedSystemPrompt,
|
|
280
|
+
extractToolCalls,
|
|
281
|
+
} from '@mastra/evals/scorers/utils'
|
|
282
|
+
|
|
283
|
+
const comprehensiveScorer = createScorer({
|
|
284
|
+
id: 'comprehensive-analysis',
|
|
285
|
+
name: 'Comprehensive Analysis',
|
|
286
|
+
description: 'Analyzes all aspects of an agent response',
|
|
287
|
+
type: 'agent',
|
|
288
|
+
})
|
|
289
|
+
.preprocess(({ run }) => {
|
|
290
|
+
// Extract all relevant data
|
|
291
|
+
const userMessage = getUserMessageFromRunInput(run.input)
|
|
292
|
+
const response = getAssistantMessageFromRunOutput(run.output)
|
|
293
|
+
const reasoning = getReasoningFromRunOutput(run.output)
|
|
294
|
+
const systemPrompt = getCombinedSystemPrompt(run.input)
|
|
295
|
+
const { tools, toolCallInfos } = extractToolCalls(run.output)
|
|
296
|
+
|
|
297
|
+
return {
|
|
298
|
+
userMessage,
|
|
299
|
+
response,
|
|
300
|
+
reasoning,
|
|
301
|
+
systemPrompt,
|
|
302
|
+
toolsUsed: tools,
|
|
303
|
+
toolCount: tools.length,
|
|
304
|
+
}
|
|
305
|
+
})
|
|
306
|
+
.generateScore(({ results }) => {
|
|
307
|
+
const { response, reasoning, toolCount } = results.preprocessStepResult || {}
|
|
308
|
+
|
|
309
|
+
let score = 0
|
|
310
|
+
if (response && response.length > 0) score += 0.4
|
|
311
|
+
if (reasoning) score += 0.3
|
|
312
|
+
if (toolCount > 0) score += 0.3
|
|
313
|
+
|
|
314
|
+
return score
|
|
315
|
+
})
|
|
316
|
+
.generateReason(({ results, score }) => {
|
|
317
|
+
const { response, reasoning, toolCount } = results.preprocessStepResult || {}
|
|
318
|
+
|
|
319
|
+
const parts = []
|
|
320
|
+
if (response) parts.push('provided a response')
|
|
321
|
+
if (reasoning) parts.push('included reasoning')
|
|
322
|
+
if (toolCount > 0) parts.push(`used ${toolCount} tool(s)`)
|
|
323
|
+
|
|
324
|
+
return `Score: ${score}. The agent ${parts.join(', ')}.`
|
|
325
|
+
})
|
|
326
|
+
```
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# Textual Difference Scorer
|
|
2
|
+
|
|
3
|
+
The `createTextualDifferenceScorer()` function uses sequence matching to measure the textual differences between two strings. It provides detailed information about changes, including the number of operations needed to transform one text into another.
|
|
4
|
+
|
|
5
|
+
## Parameters
|
|
6
|
+
|
|
7
|
+
The `createTextualDifferenceScorer()` function does not take any options.
|
|
8
|
+
|
|
9
|
+
This function returns an instance of the MastraScorer class. See the [MastraScorer reference](https://mastra.ai/reference/evals/mastra-scorer) for details on the `.run()` method and its input/output.
|
|
10
|
+
|
|
11
|
+
## .run() Returns
|
|
12
|
+
|
|
13
|
+
**runId:** (`string`): The id of the run (optional).
|
|
14
|
+
|
|
15
|
+
**analyzeStepResult:** (`object`): Object with difference metrics: { confidence: number, changes: number, lengthDiff: number }
|
|
16
|
+
|
|
17
|
+
**score:** (`number`): Similarity ratio (0-1) where 1 indicates identical texts.
|
|
18
|
+
|
|
19
|
+
`.run()` returns a result in the following shape:
|
|
20
|
+
|
|
21
|
+
```typescript
|
|
22
|
+
{
|
|
23
|
+
runId: string,
|
|
24
|
+
analyzeStepResult: {
|
|
25
|
+
confidence: number,
|
|
26
|
+
ratio: number,
|
|
27
|
+
changes: number,
|
|
28
|
+
lengthDiff: number
|
|
29
|
+
},
|
|
30
|
+
score: number
|
|
31
|
+
}
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Scoring Details
|
|
35
|
+
|
|
36
|
+
The scorer calculates several measures:
|
|
37
|
+
|
|
38
|
+
- **Similarity Ratio**: Based on sequence matching between texts (0-1)
|
|
39
|
+
- **Changes**: Count of non-matching operations needed
|
|
40
|
+
- **Length Difference**: Normalized difference in text lengths
|
|
41
|
+
- **Confidence**: Inversely proportional to length difference
|
|
42
|
+
|
|
43
|
+
### Scoring Process
|
|
44
|
+
|
|
45
|
+
1. Analyzes textual differences:
|
|
46
|
+
|
|
47
|
+
- Performs sequence matching between input and output
|
|
48
|
+
- Counts the number of change operations required
|
|
49
|
+
- Measures length differences
|
|
50
|
+
|
|
51
|
+
2. Calculates metrics:
|
|
52
|
+
|
|
53
|
+
- Computes similarity ratio
|
|
54
|
+
- Determines confidence score
|
|
55
|
+
- Combines into weighted score
|
|
56
|
+
|
|
57
|
+
Final score: `(similarity_ratio * confidence) * scale`
|
|
58
|
+
|
|
59
|
+
### Score interpretation
|
|
60
|
+
|
|
61
|
+
A textual difference score between 0 and 1:
|
|
62
|
+
|
|
63
|
+
- **1.0**: Identical texts – no differences detected.
|
|
64
|
+
- **0.7–0.9**: Minor differences – few changes needed.
|
|
65
|
+
- **0.4–0.6**: Moderate differences – noticeable changes required.
|
|
66
|
+
- **0.1–0.3**: Major differences – extensive changes needed.
|
|
67
|
+
- **0.0**: Completely different texts.
|
|
68
|
+
|
|
69
|
+
## Example
|
|
70
|
+
|
|
71
|
+
Measure textual differences between expected and actual agent outputs:
|
|
72
|
+
|
|
73
|
+
```typescript
|
|
74
|
+
import { runEvals } from '@mastra/core/evals'
|
|
75
|
+
import { createTextualDifferenceScorer } from '@mastra/evals/scorers/prebuilt'
|
|
76
|
+
import { myAgent } from './agent'
|
|
77
|
+
|
|
78
|
+
const scorer = createTextualDifferenceScorer()
|
|
79
|
+
|
|
80
|
+
const result = await runEvals({
|
|
81
|
+
data: [
|
|
82
|
+
{
|
|
83
|
+
input: 'Summarize the concept of recursion',
|
|
84
|
+
groundTruth:
|
|
85
|
+
'Recursion is when a function calls itself to solve a problem by breaking it into smaller subproblems.',
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
input: 'What is the capital of France?',
|
|
89
|
+
groundTruth: 'The capital of France is Paris.',
|
|
90
|
+
},
|
|
91
|
+
],
|
|
92
|
+
scorers: [scorer],
|
|
93
|
+
target: myAgent,
|
|
94
|
+
onItemComplete: ({ scorerResults }) => {
|
|
95
|
+
console.log({
|
|
96
|
+
score: scorerResults[scorer.id].score,
|
|
97
|
+
groundTruth: scorerResults[scorer.id].groundTruth,
|
|
98
|
+
})
|
|
99
|
+
},
|
|
100
|
+
})
|
|
101
|
+
|
|
102
|
+
console.log(result.scores)
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
For more details on `runEvals`, see the [runEvals reference](https://mastra.ai/reference/evals/run-evals).
|
|
106
|
+
|
|
107
|
+
To add this scorer to an agent, see the [Scorers overview](https://mastra.ai/docs/evals/overview) guide.
|
|
108
|
+
|
|
109
|
+
## Related
|
|
110
|
+
|
|
111
|
+
- [Content Similarity Scorer](https://mastra.ai/reference/evals/content-similarity)
|
|
112
|
+
- [Completeness Scorer](https://mastra.ai/reference/evals/completeness)
|
|
113
|
+
- [Keyword Coverage Scorer](https://mastra.ai/reference/evals/keyword-coverage)
|