@mastra/mcp-docs-server 1.1.5 → 1.1.6-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/docs/agents/adding-voice.md +349 -0
- package/.docs/docs/agents/agent-approval.md +558 -0
- package/.docs/docs/agents/agent-memory.md +209 -0
- package/.docs/docs/agents/guardrails.md +374 -0
- package/.docs/docs/agents/network-approval.md +275 -0
- package/.docs/docs/agents/networks.md +299 -0
- package/.docs/docs/agents/overview.md +304 -0
- package/.docs/docs/agents/processors.md +622 -0
- package/.docs/docs/agents/structured-output.md +273 -0
- package/.docs/docs/agents/supervisor-agents.md +304 -0
- package/.docs/docs/agents/using-tools.md +214 -0
- package/.docs/docs/build-with-ai/mcp-docs-server.md +238 -0
- package/.docs/docs/build-with-ai/skills.md +35 -0
- package/.docs/docs/community/contributing-templates.md +3 -0
- package/.docs/docs/community/discord.md +9 -0
- package/.docs/docs/community/licensing.md +66 -0
- package/.docs/docs/deployment/cloud-providers.md +15 -0
- package/.docs/docs/deployment/mastra-server.md +122 -0
- package/.docs/docs/deployment/monorepo.md +142 -0
- package/.docs/docs/deployment/overview.md +62 -0
- package/.docs/docs/deployment/studio.md +239 -0
- package/.docs/docs/deployment/web-framework.md +52 -0
- package/.docs/docs/deployment/workflow-runners.md +9 -0
- package/.docs/docs/evals/built-in-scorers.md +47 -0
- package/.docs/docs/evals/custom-scorers.md +519 -0
- package/.docs/docs/evals/overview.md +141 -0
- package/.docs/docs/evals/running-in-ci.md +124 -0
- package/.docs/docs/getting-started/build-with-ai.md +68 -0
- package/.docs/docs/getting-started/manual-install.md +226 -0
- package/.docs/docs/getting-started/project-structure.md +60 -0
- package/.docs/docs/getting-started/start.md +28 -0
- package/.docs/docs/getting-started/studio.md +101 -0
- package/.docs/docs/index.md +43 -0
- package/.docs/docs/mastra-cloud/deployment.md +77 -0
- package/.docs/docs/mastra-cloud/observability.md +38 -0
- package/.docs/docs/mastra-cloud/overview.md +23 -0
- package/.docs/docs/mastra-cloud/setup.md +42 -0
- package/.docs/docs/mastra-cloud/studio.md +24 -0
- package/.docs/docs/mastra-code/configuration.md +299 -0
- package/.docs/docs/mastra-code/customization.md +228 -0
- package/.docs/docs/mastra-code/modes.md +104 -0
- package/.docs/docs/mastra-code/overview.md +135 -0
- package/.docs/docs/mastra-code/tools.md +229 -0
- package/.docs/docs/mcp/overview.md +373 -0
- package/.docs/docs/mcp/publishing-mcp-server.md +95 -0
- package/.docs/docs/memory/memory-processors.md +314 -0
- package/.docs/docs/memory/message-history.md +260 -0
- package/.docs/docs/memory/observational-memory.md +248 -0
- package/.docs/docs/memory/overview.md +45 -0
- package/.docs/docs/memory/semantic-recall.md +272 -0
- package/.docs/docs/memory/storage.md +261 -0
- package/.docs/docs/memory/working-memory.md +400 -0
- package/.docs/docs/observability/datasets/overview.md +198 -0
- package/.docs/docs/observability/datasets/running-experiments.md +274 -0
- package/.docs/docs/observability/logging.md +99 -0
- package/.docs/docs/observability/overview.md +70 -0
- package/.docs/docs/observability/tracing/bridges/otel.md +209 -0
- package/.docs/docs/observability/tracing/exporters/arize.md +272 -0
- package/.docs/docs/observability/tracing/exporters/braintrust.md +111 -0
- package/.docs/docs/observability/tracing/exporters/cloud.md +127 -0
- package/.docs/docs/observability/tracing/exporters/datadog.md +187 -0
- package/.docs/docs/observability/tracing/exporters/default.md +209 -0
- package/.docs/docs/observability/tracing/exporters/laminar.md +100 -0
- package/.docs/docs/observability/tracing/exporters/langfuse.md +213 -0
- package/.docs/docs/observability/tracing/exporters/langsmith.md +198 -0
- package/.docs/docs/observability/tracing/exporters/otel.md +476 -0
- package/.docs/docs/observability/tracing/exporters/posthog.md +148 -0
- package/.docs/docs/observability/tracing/exporters/sentry.md +208 -0
- package/.docs/docs/observability/tracing/overview.md +1112 -0
- package/.docs/docs/observability/tracing/processors/sensitive-data-filter.md +300 -0
- package/.docs/docs/rag/chunking-and-embedding.md +183 -0
- package/.docs/docs/rag/graph-rag.md +215 -0
- package/.docs/docs/rag/overview.md +72 -0
- package/.docs/docs/rag/retrieval.md +515 -0
- package/.docs/docs/rag/vector-databases.md +645 -0
- package/.docs/docs/server/auth/auth0.md +220 -0
- package/.docs/docs/server/auth/better-auth.md +203 -0
- package/.docs/docs/server/auth/clerk.md +132 -0
- package/.docs/docs/server/auth/composite-auth.md +234 -0
- package/.docs/docs/server/auth/custom-auth-provider.md +513 -0
- package/.docs/docs/server/auth/firebase.md +272 -0
- package/.docs/docs/server/auth/jwt.md +110 -0
- package/.docs/docs/server/auth/simple-auth.md +180 -0
- package/.docs/docs/server/auth/supabase.md +117 -0
- package/.docs/docs/server/auth/workos.md +186 -0
- package/.docs/docs/server/auth.md +38 -0
- package/.docs/docs/server/custom-adapters.md +378 -0
- package/.docs/docs/server/custom-api-routes.md +267 -0
- package/.docs/docs/server/mastra-client.md +243 -0
- package/.docs/docs/server/mastra-server.md +71 -0
- package/.docs/docs/server/middleware.md +225 -0
- package/.docs/docs/server/request-context.md +471 -0
- package/.docs/docs/server/server-adapters.md +547 -0
- package/.docs/docs/streaming/events.md +237 -0
- package/.docs/docs/streaming/overview.md +175 -0
- package/.docs/docs/streaming/tool-streaming.md +175 -0
- package/.docs/docs/streaming/workflow-streaming.md +109 -0
- package/.docs/docs/voice/overview.md +959 -0
- package/.docs/docs/voice/speech-to-speech.md +102 -0
- package/.docs/docs/voice/speech-to-text.md +79 -0
- package/.docs/docs/voice/text-to-speech.md +83 -0
- package/.docs/docs/workflows/agents-and-tools.md +166 -0
- package/.docs/docs/workflows/control-flow.md +822 -0
- package/.docs/docs/workflows/error-handling.md +360 -0
- package/.docs/docs/workflows/human-in-the-loop.md +215 -0
- package/.docs/docs/workflows/overview.md +370 -0
- package/.docs/docs/workflows/snapshots.md +238 -0
- package/.docs/docs/workflows/suspend-and-resume.md +205 -0
- package/.docs/docs/workflows/time-travel.md +309 -0
- package/.docs/docs/workflows/workflow-state.md +181 -0
- package/.docs/docs/workspace/filesystem.md +164 -0
- package/.docs/docs/workspace/overview.md +239 -0
- package/.docs/docs/workspace/sandbox.md +63 -0
- package/.docs/docs/workspace/search.md +243 -0
- package/.docs/docs/workspace/skills.md +169 -0
- package/.docs/guides/agent-frameworks/ai-sdk.md +140 -0
- package/.docs/guides/build-your-ui/ai-sdk-ui.md +1499 -0
- package/.docs/guides/build-your-ui/assistant-ui.md +156 -0
- package/.docs/guides/build-your-ui/copilotkit.md +289 -0
- package/.docs/guides/deployment/amazon-ec2.md +130 -0
- package/.docs/guides/deployment/aws-lambda.md +248 -0
- package/.docs/guides/deployment/azure-app-services.md +114 -0
- package/.docs/guides/deployment/cloudflare.md +99 -0
- package/.docs/guides/deployment/digital-ocean.md +168 -0
- package/.docs/guides/deployment/inngest.md +682 -0
- package/.docs/guides/deployment/netlify.md +77 -0
- package/.docs/guides/deployment/vercel.md +101 -0
- package/.docs/guides/getting-started/astro.md +398 -0
- package/.docs/guides/getting-started/electron.md +504 -0
- package/.docs/guides/getting-started/express.md +251 -0
- package/.docs/guides/getting-started/hono.md +190 -0
- package/.docs/guides/getting-started/next-js.md +347 -0
- package/.docs/guides/getting-started/nuxt.md +497 -0
- package/.docs/guides/getting-started/quickstart.md +67 -0
- package/.docs/guides/getting-started/sveltekit.md +296 -0
- package/.docs/guides/getting-started/vite-react.md +425 -0
- package/.docs/guides/guide/ai-recruiter.md +226 -0
- package/.docs/guides/guide/chef-michel.md +211 -0
- package/.docs/guides/guide/code-review-bot.md +226 -0
- package/.docs/guides/guide/dev-assistant.md +307 -0
- package/.docs/guides/guide/docs-manager.md +238 -0
- package/.docs/guides/guide/github-actions-pr-description.md +236 -0
- package/.docs/guides/guide/notes-mcp-server.md +416 -0
- package/.docs/guides/guide/research-assistant.md +348 -0
- package/.docs/guides/guide/research-coordinator.md +416 -0
- package/.docs/guides/guide/stock-agent.md +132 -0
- package/.docs/guides/guide/web-search.md +320 -0
- package/.docs/guides/guide/whatsapp-chat-bot.md +405 -0
- package/.docs/guides/index.md +3 -0
- package/.docs/guides/migrations/agentnetwork.md +97 -0
- package/.docs/guides/migrations/ai-sdk-v4-to-v5.md +112 -0
- package/.docs/guides/migrations/network-to-supervisor.md +261 -0
- package/.docs/guides/migrations/upgrade-to-v1/agent.md +404 -0
- package/.docs/guides/migrations/upgrade-to-v1/cli.md +57 -0
- package/.docs/guides/migrations/upgrade-to-v1/client.md +337 -0
- package/.docs/guides/migrations/upgrade-to-v1/deployment.md +37 -0
- package/.docs/guides/migrations/upgrade-to-v1/evals.md +239 -0
- package/.docs/guides/migrations/upgrade-to-v1/mastra.md +143 -0
- package/.docs/guides/migrations/upgrade-to-v1/mcp.md +97 -0
- package/.docs/guides/migrations/upgrade-to-v1/memory.md +285 -0
- package/.docs/guides/migrations/upgrade-to-v1/overview.md +119 -0
- package/.docs/guides/migrations/upgrade-to-v1/processors.md +68 -0
- package/.docs/guides/migrations/upgrade-to-v1/rag.md +42 -0
- package/.docs/guides/migrations/upgrade-to-v1/storage.md +553 -0
- package/.docs/guides/migrations/upgrade-to-v1/tools.md +180 -0
- package/.docs/guides/migrations/upgrade-to-v1/tracing.md +412 -0
- package/.docs/guides/migrations/upgrade-to-v1/vectors.md +87 -0
- package/.docs/guides/migrations/upgrade-to-v1/voice.md +30 -0
- package/.docs/guides/migrations/upgrade-to-v1/workflows.md +341 -0
- package/.docs/guides/migrations/vnext-to-standard-apis.md +362 -0
- package/.docs/models/embeddings.md +161 -0
- package/.docs/models/gateways/azure-openai.md +128 -0
- package/.docs/models/gateways/custom-gateways.md +545 -0
- package/.docs/models/gateways/netlify.md +88 -0
- package/.docs/models/gateways/openrouter.md +219 -0
- package/.docs/models/gateways/vercel.md +225 -0
- package/.docs/models/gateways.md +14 -0
- package/.docs/models/index.md +286 -0
- package/.docs/models/providers/302ai.md +134 -0
- package/.docs/models/providers/abacus.md +125 -0
- package/.docs/models/providers/agentrouter.md +90 -0
- package/.docs/models/providers/aihubmix.md +107 -0
- package/.docs/models/providers/alibaba-cn.md +135 -0
- package/.docs/models/providers/alibaba.md +111 -0
- package/.docs/models/providers/amazon-bedrock.md +33 -0
- package/.docs/models/providers/anthropic.md +153 -0
- package/.docs/models/providers/azure.md +33 -0
- package/.docs/models/providers/bailing.md +72 -0
- package/.docs/models/providers/baseten.md +77 -0
- package/.docs/models/providers/berget.md +78 -0
- package/.docs/models/providers/cerebras.md +101 -0
- package/.docs/models/providers/chutes.md +136 -0
- package/.docs/models/providers/cloudflare-ai-gateway.md +33 -0
- package/.docs/models/providers/cloudflare-workers-ai.md +109 -0
- package/.docs/models/providers/cohere.md +33 -0
- package/.docs/models/providers/cortecs.md +91 -0
- package/.docs/models/providers/deepinfra.md +112 -0
- package/.docs/models/providers/deepseek.md +88 -0
- package/.docs/models/providers/fastrouter.md +84 -0
- package/.docs/models/providers/fireworks-ai.md +89 -0
- package/.docs/models/providers/firmware.md +85 -0
- package/.docs/models/providers/friendli.md +78 -0
- package/.docs/models/providers/github-models.md +125 -0
- package/.docs/models/providers/google-vertex.md +33 -0
- package/.docs/models/providers/google.md +159 -0
- package/.docs/models/providers/groq.md +107 -0
- package/.docs/models/providers/helicone.md +161 -0
- package/.docs/models/providers/huggingface.md +90 -0
- package/.docs/models/providers/iflowcn.md +84 -0
- package/.docs/models/providers/inception.md +72 -0
- package/.docs/models/providers/inference.md +79 -0
- package/.docs/models/providers/io-intelligence.md +87 -0
- package/.docs/models/providers/io-net.md +87 -0
- package/.docs/models/providers/jiekou.md +131 -0
- package/.docs/models/providers/kilo.md +333 -0
- package/.docs/models/providers/kimi-for-coding.md +100 -0
- package/.docs/models/providers/kuae-cloud-coding-plan.md +71 -0
- package/.docs/models/providers/llama.md +77 -0
- package/.docs/models/providers/lmstudio.md +73 -0
- package/.docs/models/providers/lucidquery.md +72 -0
- package/.docs/models/providers/minimax-cn-coding-plan.md +102 -0
- package/.docs/models/providers/minimax-cn.md +102 -0
- package/.docs/models/providers/minimax-coding-plan.md +102 -0
- package/.docs/models/providers/minimax.md +104 -0
- package/.docs/models/providers/mistral.md +124 -0
- package/.docs/models/providers/moark.md +72 -0
- package/.docs/models/providers/modelscope.md +77 -0
- package/.docs/models/providers/moonshotai-cn.md +76 -0
- package/.docs/models/providers/moonshotai.md +76 -0
- package/.docs/models/providers/morph.md +73 -0
- package/.docs/models/providers/nano-gpt.md +103 -0
- package/.docs/models/providers/nebius.md +116 -0
- package/.docs/models/providers/nova.md +72 -0
- package/.docs/models/providers/novita-ai.md +154 -0
- package/.docs/models/providers/nvidia.md +141 -0
- package/.docs/models/providers/ollama-cloud.md +103 -0
- package/.docs/models/providers/ollama.md +33 -0
- package/.docs/models/providers/openai.md +193 -0
- package/.docs/models/providers/opencode.md +100 -0
- package/.docs/models/providers/ovhcloud.md +83 -0
- package/.docs/models/providers/perplexity.md +100 -0
- package/.docs/models/providers/poe.md +183 -0
- package/.docs/models/providers/privatemode-ai.md +75 -0
- package/.docs/models/providers/requesty.md +90 -0
- package/.docs/models/providers/scaleway.md +84 -0
- package/.docs/models/providers/siliconflow-cn.md +138 -0
- package/.docs/models/providers/siliconflow.md +140 -0
- package/.docs/models/providers/stackit.md +78 -0
- package/.docs/models/providers/stepfun.md +73 -0
- package/.docs/models/providers/submodel.md +79 -0
- package/.docs/models/providers/synthetic.md +96 -0
- package/.docs/models/providers/togetherai.md +115 -0
- package/.docs/models/providers/upstage.md +73 -0
- package/.docs/models/providers/venice.md +95 -0
- package/.docs/models/providers/vivgrid.md +106 -0
- package/.docs/models/providers/vultr.md +75 -0
- package/.docs/models/providers/wandb.md +80 -0
- package/.docs/models/providers/xai.md +141 -0
- package/.docs/models/providers/xiaomi.md +71 -0
- package/.docs/models/providers/zai-coding-plan.md +80 -0
- package/.docs/models/providers/zai.md +79 -0
- package/.docs/models/providers/zenmux.md +161 -0
- package/.docs/models/providers/zhipuai-coding-plan.md +79 -0
- package/.docs/models/providers/zhipuai.md +79 -0
- package/.docs/models/providers.md +81 -0
- package/.docs/reference/agents/agent.md +141 -0
- package/.docs/reference/agents/generate.md +186 -0
- package/.docs/reference/agents/generateLegacy.md +173 -0
- package/.docs/reference/agents/getDefaultGenerateOptions.md +36 -0
- package/.docs/reference/agents/getDefaultOptions.md +34 -0
- package/.docs/reference/agents/getDefaultStreamOptions.md +36 -0
- package/.docs/reference/agents/getDescription.md +21 -0
- package/.docs/reference/agents/getInstructions.md +34 -0
- package/.docs/reference/agents/getLLM.md +37 -0
- package/.docs/reference/agents/getMemory.md +34 -0
- package/.docs/reference/agents/getModel.md +34 -0
- package/.docs/reference/agents/getTools.md +29 -0
- package/.docs/reference/agents/getVoice.md +34 -0
- package/.docs/reference/agents/listAgents.md +35 -0
- package/.docs/reference/agents/listScorers.md +34 -0
- package/.docs/reference/agents/listTools.md +34 -0
- package/.docs/reference/agents/listWorkflows.md +34 -0
- package/.docs/reference/agents/network.md +133 -0
- package/.docs/reference/ai-sdk/chat-route.md +82 -0
- package/.docs/reference/ai-sdk/handle-chat-stream.md +53 -0
- package/.docs/reference/ai-sdk/handle-network-stream.md +37 -0
- package/.docs/reference/ai-sdk/handle-workflow-stream.md +55 -0
- package/.docs/reference/ai-sdk/network-route.md +74 -0
- package/.docs/reference/ai-sdk/to-ai-sdk-stream.md +231 -0
- package/.docs/reference/ai-sdk/to-ai-sdk-v4-messages.md +79 -0
- package/.docs/reference/ai-sdk/to-ai-sdk-v5-messages.md +76 -0
- package/.docs/reference/ai-sdk/with-mastra.md +59 -0
- package/.docs/reference/ai-sdk/workflow-route.md +79 -0
- package/.docs/reference/auth/auth0.md +73 -0
- package/.docs/reference/auth/better-auth.md +71 -0
- package/.docs/reference/auth/clerk.md +36 -0
- package/.docs/reference/auth/firebase.md +80 -0
- package/.docs/reference/auth/jwt.md +26 -0
- package/.docs/reference/auth/supabase.md +33 -0
- package/.docs/reference/auth/workos.md +84 -0
- package/.docs/reference/cli/create-mastra.md +137 -0
- package/.docs/reference/cli/mastra.md +336 -0
- package/.docs/reference/client-js/agents.md +437 -0
- package/.docs/reference/client-js/error-handling.md +16 -0
- package/.docs/reference/client-js/logs.md +24 -0
- package/.docs/reference/client-js/mastra-client.md +63 -0
- package/.docs/reference/client-js/memory.md +221 -0
- package/.docs/reference/client-js/observability.md +72 -0
- package/.docs/reference/client-js/telemetry.md +20 -0
- package/.docs/reference/client-js/tools.md +44 -0
- package/.docs/reference/client-js/vectors.md +79 -0
- package/.docs/reference/client-js/workflows.md +199 -0
- package/.docs/reference/configuration.md +752 -0
- package/.docs/reference/core/addGateway.md +42 -0
- package/.docs/reference/core/getAgent.md +21 -0
- package/.docs/reference/core/getAgentById.md +21 -0
- package/.docs/reference/core/getDeployer.md +22 -0
- package/.docs/reference/core/getGateway.md +38 -0
- package/.docs/reference/core/getGatewayById.md +41 -0
- package/.docs/reference/core/getLogger.md +22 -0
- package/.docs/reference/core/getMCPServer.md +47 -0
- package/.docs/reference/core/getMCPServerById.md +55 -0
- package/.docs/reference/core/getMemory.md +50 -0
- package/.docs/reference/core/getScorer.md +54 -0
- package/.docs/reference/core/getScorerById.md +54 -0
- package/.docs/reference/core/getServer.md +22 -0
- package/.docs/reference/core/getStorage.md +22 -0
- package/.docs/reference/core/getStoredAgentById.md +89 -0
- package/.docs/reference/core/getTelemetry.md +22 -0
- package/.docs/reference/core/getVector.md +22 -0
- package/.docs/reference/core/getWorkflow.md +42 -0
- package/.docs/reference/core/listAgents.md +21 -0
- package/.docs/reference/core/listGateways.md +40 -0
- package/.docs/reference/core/listLogs.md +38 -0
- package/.docs/reference/core/listLogsByRunId.md +36 -0
- package/.docs/reference/core/listMCPServers.md +55 -0
- package/.docs/reference/core/listMemory.md +56 -0
- package/.docs/reference/core/listScorers.md +29 -0
- package/.docs/reference/core/listStoredAgents.md +93 -0
- package/.docs/reference/core/listVectors.md +22 -0
- package/.docs/reference/core/listWorkflows.md +21 -0
- package/.docs/reference/core/mastra-class.md +66 -0
- package/.docs/reference/core/mastra-model-gateway.md +153 -0
- package/.docs/reference/core/setLogger.md +26 -0
- package/.docs/reference/core/setStorage.md +27 -0
- package/.docs/reference/datasets/addItem.md +37 -0
- package/.docs/reference/datasets/addItems.md +35 -0
- package/.docs/reference/datasets/compareExperiments.md +52 -0
- package/.docs/reference/datasets/create.md +51 -0
- package/.docs/reference/datasets/dataset.md +82 -0
- package/.docs/reference/datasets/datasets-manager.md +94 -0
- package/.docs/reference/datasets/delete.md +25 -0
- package/.docs/reference/datasets/deleteExperiment.md +27 -0
- package/.docs/reference/datasets/deleteItem.md +27 -0
- package/.docs/reference/datasets/deleteItems.md +29 -0
- package/.docs/reference/datasets/get.md +31 -0
- package/.docs/reference/datasets/getDetails.md +47 -0
- package/.docs/reference/datasets/getExperiment.md +30 -0
- package/.docs/reference/datasets/getItem.md +33 -0
- package/.docs/reference/datasets/getItemHistory.md +31 -0
- package/.docs/reference/datasets/list.md +31 -0
- package/.docs/reference/datasets/listExperimentResults.md +39 -0
- package/.docs/reference/datasets/listExperiments.md +33 -0
- package/.docs/reference/datasets/listItems.md +46 -0
- package/.docs/reference/datasets/listVersions.md +33 -0
- package/.docs/reference/datasets/startExperiment.md +62 -0
- package/.docs/reference/datasets/startExperimentAsync.md +43 -0
- package/.docs/reference/datasets/update.md +48 -0
- package/.docs/reference/datasets/updateItem.md +38 -0
- package/.docs/reference/deployer/cloudflare.md +79 -0
- package/.docs/reference/deployer/netlify.md +80 -0
- package/.docs/reference/deployer/vercel.md +91 -0
- package/.docs/reference/deployer.md +100 -0
- package/.docs/reference/evals/answer-relevancy.md +105 -0
- package/.docs/reference/evals/answer-similarity.md +99 -0
- package/.docs/reference/evals/bias.md +120 -0
- package/.docs/reference/evals/completeness.md +136 -0
- package/.docs/reference/evals/content-similarity.md +101 -0
- package/.docs/reference/evals/context-precision.md +196 -0
- package/.docs/reference/evals/context-relevance.md +531 -0
- package/.docs/reference/evals/create-scorer.md +270 -0
- package/.docs/reference/evals/faithfulness.md +114 -0
- package/.docs/reference/evals/hallucination.md +213 -0
- package/.docs/reference/evals/keyword-coverage.md +128 -0
- package/.docs/reference/evals/mastra-scorer.md +123 -0
- package/.docs/reference/evals/noise-sensitivity.md +675 -0
- package/.docs/reference/evals/prompt-alignment.md +614 -0
- package/.docs/reference/evals/run-evals.md +179 -0
- package/.docs/reference/evals/scorer-utils.md +326 -0
- package/.docs/reference/evals/textual-difference.md +113 -0
- package/.docs/reference/evals/tone-consistency.md +119 -0
- package/.docs/reference/evals/tool-call-accuracy.md +533 -0
- package/.docs/reference/evals/toxicity.md +123 -0
- package/.docs/reference/harness/harness-class.md +708 -0
- package/.docs/reference/index.md +277 -0
- package/.docs/reference/logging/pino-logger.md +117 -0
- package/.docs/reference/mastra-code/createMastraCode.md +108 -0
- package/.docs/reference/memory/clone-utilities.md +199 -0
- package/.docs/reference/memory/cloneThread.md +130 -0
- package/.docs/reference/memory/createThread.md +68 -0
- package/.docs/reference/memory/deleteMessages.md +38 -0
- package/.docs/reference/memory/getThreadById.md +24 -0
- package/.docs/reference/memory/listThreads.md +145 -0
- package/.docs/reference/memory/memory-class.md +147 -0
- package/.docs/reference/memory/observational-memory.md +565 -0
- package/.docs/reference/memory/recall.md +91 -0
- package/.docs/reference/observability/tracing/bridges/otel.md +131 -0
- package/.docs/reference/observability/tracing/configuration.md +178 -0
- package/.docs/reference/observability/tracing/exporters/arize.md +141 -0
- package/.docs/reference/observability/tracing/exporters/braintrust.md +93 -0
- package/.docs/reference/observability/tracing/exporters/cloud-exporter.md +163 -0
- package/.docs/reference/observability/tracing/exporters/console-exporter.md +138 -0
- package/.docs/reference/observability/tracing/exporters/datadog.md +116 -0
- package/.docs/reference/observability/tracing/exporters/default-exporter.md +174 -0
- package/.docs/reference/observability/tracing/exporters/laminar.md +78 -0
- package/.docs/reference/observability/tracing/exporters/langfuse.md +134 -0
- package/.docs/reference/observability/tracing/exporters/langsmith.md +108 -0
- package/.docs/reference/observability/tracing/exporters/otel.md +199 -0
- package/.docs/reference/observability/tracing/exporters/posthog.md +92 -0
- package/.docs/reference/observability/tracing/exporters/sentry.md +184 -0
- package/.docs/reference/observability/tracing/instances.md +107 -0
- package/.docs/reference/observability/tracing/interfaces.md +743 -0
- package/.docs/reference/observability/tracing/processors/sensitive-data-filter.md +144 -0
- package/.docs/reference/observability/tracing/spans.md +224 -0
- package/.docs/reference/processors/batch-parts-processor.md +61 -0
- package/.docs/reference/processors/language-detector.md +82 -0
- package/.docs/reference/processors/message-history-processor.md +85 -0
- package/.docs/reference/processors/moderation-processor.md +104 -0
- package/.docs/reference/processors/pii-detector.md +108 -0
- package/.docs/reference/processors/processor-interface.md +521 -0
- package/.docs/reference/processors/prompt-injection-detector.md +72 -0
- package/.docs/reference/processors/semantic-recall-processor.md +117 -0
- package/.docs/reference/processors/system-prompt-scrubber.md +80 -0
- package/.docs/reference/processors/token-limiter-processor.md +115 -0
- package/.docs/reference/processors/tool-call-filter.md +85 -0
- package/.docs/reference/processors/tool-search-processor.md +111 -0
- package/.docs/reference/processors/unicode-normalizer.md +62 -0
- package/.docs/reference/processors/working-memory-processor.md +152 -0
- package/.docs/reference/rag/chunk.md +221 -0
- package/.docs/reference/rag/database-config.md +261 -0
- package/.docs/reference/rag/document.md +114 -0
- package/.docs/reference/rag/embeddings.md +92 -0
- package/.docs/reference/rag/extract-params.md +168 -0
- package/.docs/reference/rag/graph-rag.md +111 -0
- package/.docs/reference/rag/metadata-filters.md +216 -0
- package/.docs/reference/rag/rerank.md +75 -0
- package/.docs/reference/rag/rerankWithScorer.md +80 -0
- package/.docs/reference/server/create-route.md +262 -0
- package/.docs/reference/server/express-adapter.md +176 -0
- package/.docs/reference/server/fastify-adapter.md +90 -0
- package/.docs/reference/server/hono-adapter.md +162 -0
- package/.docs/reference/server/koa-adapter.md +127 -0
- package/.docs/reference/server/mastra-server.md +298 -0
- package/.docs/reference/server/register-api-route.md +249 -0
- package/.docs/reference/server/routes.md +306 -0
- package/.docs/reference/storage/cloudflare-d1.md +218 -0
- package/.docs/reference/storage/cloudflare.md +88 -0
- package/.docs/reference/storage/composite.md +235 -0
- package/.docs/reference/storage/convex.md +161 -0
- package/.docs/reference/storage/dynamodb.md +282 -0
- package/.docs/reference/storage/lance.md +131 -0
- package/.docs/reference/storage/libsql.md +135 -0
- package/.docs/reference/storage/mongodb.md +262 -0
- package/.docs/reference/storage/mssql.md +157 -0
- package/.docs/reference/storage/overview.md +121 -0
- package/.docs/reference/storage/postgresql.md +526 -0
- package/.docs/reference/storage/upstash.md +160 -0
- package/.docs/reference/streaming/ChunkType.md +292 -0
- package/.docs/reference/streaming/agents/MastraModelOutput.md +182 -0
- package/.docs/reference/streaming/agents/stream.md +221 -0
- package/.docs/reference/streaming/agents/streamLegacy.md +142 -0
- package/.docs/reference/streaming/workflows/observeStream.md +42 -0
- package/.docs/reference/streaming/workflows/resumeStream.md +61 -0
- package/.docs/reference/streaming/workflows/stream.md +88 -0
- package/.docs/reference/streaming/workflows/timeTravelStream.md +142 -0
- package/.docs/reference/templates/overview.md +194 -0
- package/.docs/reference/tools/create-tool.md +237 -0
- package/.docs/reference/tools/document-chunker-tool.md +89 -0
- package/.docs/reference/tools/graph-rag-tool.md +182 -0
- package/.docs/reference/tools/mcp-client.md +954 -0
- package/.docs/reference/tools/mcp-server.md +1271 -0
- package/.docs/reference/tools/vector-query-tool.md +459 -0
- package/.docs/reference/vectors/astra.md +121 -0
- package/.docs/reference/vectors/chroma.md +264 -0
- package/.docs/reference/vectors/convex.md +300 -0
- package/.docs/reference/vectors/couchbase.md +226 -0
- package/.docs/reference/vectors/duckdb.md +318 -0
- package/.docs/reference/vectors/elasticsearch.md +189 -0
- package/.docs/reference/vectors/lance.md +220 -0
- package/.docs/reference/vectors/libsql.md +305 -0
- package/.docs/reference/vectors/mongodb.md +295 -0
- package/.docs/reference/vectors/opensearch.md +99 -0
- package/.docs/reference/vectors/pg.md +408 -0
- package/.docs/reference/vectors/pinecone.md +168 -0
- package/.docs/reference/vectors/qdrant.md +222 -0
- package/.docs/reference/vectors/s3vectors.md +277 -0
- package/.docs/reference/vectors/turbopuffer.md +157 -0
- package/.docs/reference/vectors/upstash.md +294 -0
- package/.docs/reference/vectors/vectorize.md +147 -0
- package/.docs/reference/voice/azure.md +148 -0
- package/.docs/reference/voice/cloudflare.md +83 -0
- package/.docs/reference/voice/composite-voice.md +121 -0
- package/.docs/reference/voice/deepgram.md +79 -0
- package/.docs/reference/voice/elevenlabs.md +98 -0
- package/.docs/reference/voice/google-gemini-live.md +378 -0
- package/.docs/reference/voice/google.md +228 -0
- package/.docs/reference/voice/mastra-voice.md +311 -0
- package/.docs/reference/voice/murf.md +122 -0
- package/.docs/reference/voice/openai-realtime.md +203 -0
- package/.docs/reference/voice/openai.md +88 -0
- package/.docs/reference/voice/playai.md +80 -0
- package/.docs/reference/voice/sarvam.md +126 -0
- package/.docs/reference/voice/speechify.md +75 -0
- package/.docs/reference/voice/voice.addInstructions.md +55 -0
- package/.docs/reference/voice/voice.addTools.md +67 -0
- package/.docs/reference/voice/voice.answer.md +54 -0
- package/.docs/reference/voice/voice.close.md +51 -0
- package/.docs/reference/voice/voice.connect.md +94 -0
- package/.docs/reference/voice/voice.events.md +37 -0
- package/.docs/reference/voice/voice.getSpeakers.md +129 -0
- package/.docs/reference/voice/voice.listen.md +164 -0
- package/.docs/reference/voice/voice.off.md +54 -0
- package/.docs/reference/voice/voice.on.md +111 -0
- package/.docs/reference/voice/voice.send.md +65 -0
- package/.docs/reference/voice/voice.speak.md +157 -0
- package/.docs/reference/voice/voice.updateConfig.md +60 -0
- package/.docs/reference/workflows/run-methods/cancel.md +86 -0
- package/.docs/reference/workflows/run-methods/restart.md +33 -0
- package/.docs/reference/workflows/run-methods/resume.md +59 -0
- package/.docs/reference/workflows/run-methods/start.md +58 -0
- package/.docs/reference/workflows/run-methods/startAsync.md +67 -0
- package/.docs/reference/workflows/run-methods/timeTravel.md +142 -0
- package/.docs/reference/workflows/run.md +59 -0
- package/.docs/reference/workflows/step.md +119 -0
- package/.docs/reference/workflows/workflow-methods/branch.md +25 -0
- package/.docs/reference/workflows/workflow-methods/commit.md +17 -0
- package/.docs/reference/workflows/workflow-methods/create-run.md +63 -0
- package/.docs/reference/workflows/workflow-methods/dountil.md +25 -0
- package/.docs/reference/workflows/workflow-methods/dowhile.md +25 -0
- package/.docs/reference/workflows/workflow-methods/foreach.md +118 -0
- package/.docs/reference/workflows/workflow-methods/map.md +93 -0
- package/.docs/reference/workflows/workflow-methods/parallel.md +21 -0
- package/.docs/reference/workflows/workflow-methods/sleep.md +35 -0
- package/.docs/reference/workflows/workflow-methods/sleepUntil.md +35 -0
- package/.docs/reference/workflows/workflow-methods/then.md +21 -0
- package/.docs/reference/workflows/workflow.md +157 -0
- package/.docs/reference/workspace/e2b-sandbox.md +289 -0
- package/.docs/reference/workspace/filesystem.md +255 -0
- package/.docs/reference/workspace/gcs-filesystem.md +174 -0
- package/.docs/reference/workspace/local-filesystem.md +343 -0
- package/.docs/reference/workspace/local-sandbox.md +301 -0
- package/.docs/reference/workspace/s3-filesystem.md +175 -0
- package/.docs/reference/workspace/sandbox.md +87 -0
- package/.docs/reference/workspace/workspace-class.md +244 -0
- package/CHANGELOG.md +8 -0
- package/package.json +5 -5
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# Tone Consistency Scorer
|
|
2
|
+
|
|
3
|
+
The `createToneScorer()` function evaluates the text's emotional tone and sentiment consistency. It can operate in two modes: comparing tone between input/output pairs or analyzing tone stability within a single text.
|
|
4
|
+
|
|
5
|
+
## Parameters
|
|
6
|
+
|
|
7
|
+
The `createToneScorer()` function does not take any options.
|
|
8
|
+
|
|
9
|
+
This function returns an instance of the MastraScorer class. See the [MastraScorer reference](https://mastra.ai/reference/evals/mastra-scorer) for details on the `.run()` method and its input/output.
|
|
10
|
+
|
|
11
|
+
## .run() Returns
|
|
12
|
+
|
|
13
|
+
**runId:** (`string`): The id of the run (optional).
|
|
14
|
+
|
|
15
|
+
**analyzeStepResult:** (`object`): Object with tone metrics: { responseSentiment: number, referenceSentiment: number, difference: number } (for comparison mode) OR { avgSentiment: number, sentimentVariance: number } (for stability mode)
|
|
16
|
+
|
|
17
|
+
**score:** (`number`): Tone consistency/stability score (0-1).
|
|
18
|
+
|
|
19
|
+
`.run()` returns a result in the following shape:
|
|
20
|
+
|
|
21
|
+
```typescript
|
|
22
|
+
{
|
|
23
|
+
runId: string,
|
|
24
|
+
analyzeStepResult: {
|
|
25
|
+
responseSentiment?: number,
|
|
26
|
+
referenceSentiment?: number,
|
|
27
|
+
difference?: number,
|
|
28
|
+
avgSentiment?: number,
|
|
29
|
+
sentimentVariance?: number,
|
|
30
|
+
},
|
|
31
|
+
score: number
|
|
32
|
+
}
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Scoring Details
|
|
36
|
+
|
|
37
|
+
The scorer evaluates sentiment consistency through tone pattern analysis and mode-specific scoring.
|
|
38
|
+
|
|
39
|
+
### Scoring Process
|
|
40
|
+
|
|
41
|
+
1. Analyzes tone patterns:
|
|
42
|
+
|
|
43
|
+
- Extracts sentiment features
|
|
44
|
+
- Computes sentiment scores
|
|
45
|
+
- Measures tone variations
|
|
46
|
+
|
|
47
|
+
2. Calculates mode-specific score: **Tone Consistency** (input and output):
|
|
48
|
+
|
|
49
|
+
- Compares sentiment between texts
|
|
50
|
+
- Calculates sentiment difference
|
|
51
|
+
- Score = 1 - (sentiment\_difference / max\_difference) **Tone Stability** (single input):
|
|
52
|
+
- Analyzes sentiment across sentences
|
|
53
|
+
- Calculates sentiment variance
|
|
54
|
+
- Score = 1 - (sentiment\_variance / max\_variance)
|
|
55
|
+
|
|
56
|
+
Final score: `mode_specific_score * scale`
|
|
57
|
+
|
|
58
|
+
### Score interpretation
|
|
59
|
+
|
|
60
|
+
(0 to scale, default 0-1)
|
|
61
|
+
|
|
62
|
+
- 1.0: Perfect tone consistency/stability
|
|
63
|
+
- 0.7-0.9: Strong consistency with minor variations
|
|
64
|
+
- 0.4-0.6: Moderate consistency with noticeable shifts
|
|
65
|
+
- 0.1-0.3: Poor consistency with major tone changes
|
|
66
|
+
- 0.0: No consistency - completely different tones
|
|
67
|
+
|
|
68
|
+
### analyzeStepResult
|
|
69
|
+
|
|
70
|
+
Object with tone metrics:
|
|
71
|
+
|
|
72
|
+
- **responseSentiment**: Sentiment score for the response (comparison mode).
|
|
73
|
+
- **referenceSentiment**: Sentiment score for the input/reference (comparison mode).
|
|
74
|
+
- **difference**: Absolute difference between sentiment scores (comparison mode).
|
|
75
|
+
- **avgSentiment**: Average sentiment across sentences (stability mode).
|
|
76
|
+
- **sentimentVariance**: Variance of sentiment across sentences (stability mode).
|
|
77
|
+
|
|
78
|
+
## Example
|
|
79
|
+
|
|
80
|
+
Evaluate tone consistency between related agent responses:
|
|
81
|
+
|
|
82
|
+
```typescript
|
|
83
|
+
import { runEvals } from '@mastra/core/evals'
|
|
84
|
+
import { createToneScorer } from '@mastra/evals/scorers/prebuilt'
|
|
85
|
+
import { myAgent } from './agent'
|
|
86
|
+
|
|
87
|
+
const scorer = createToneScorer()
|
|
88
|
+
|
|
89
|
+
const result = await runEvals({
|
|
90
|
+
data: [
|
|
91
|
+
{
|
|
92
|
+
input: 'How was your experience with our service?',
|
|
93
|
+
groundTruth: 'The service was excellent and exceeded expectations!',
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
input: 'Tell me about the customer support',
|
|
97
|
+
groundTruth: 'The support team was friendly and very helpful.',
|
|
98
|
+
},
|
|
99
|
+
],
|
|
100
|
+
scorers: [scorer],
|
|
101
|
+
target: myAgent,
|
|
102
|
+
onItemComplete: ({ scorerResults }) => {
|
|
103
|
+
console.log({
|
|
104
|
+
score: scorerResults[scorer.id].score,
|
|
105
|
+
})
|
|
106
|
+
},
|
|
107
|
+
})
|
|
108
|
+
|
|
109
|
+
console.log(result.scores)
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
For more details on `runEvals`, see the [runEvals reference](https://mastra.ai/reference/evals/run-evals).
|
|
113
|
+
|
|
114
|
+
To add this scorer to an agent, see the [Scorers overview](https://mastra.ai/docs/evals/overview) guide.
|
|
115
|
+
|
|
116
|
+
## Related
|
|
117
|
+
|
|
118
|
+
- [Content Similarity Scorer](https://mastra.ai/reference/evals/content-similarity)
|
|
119
|
+
- [Toxicity Scorer](https://mastra.ai/reference/evals/toxicity)
|
|
@@ -0,0 +1,533 @@
|
|
|
1
|
+
# Tool Call Accuracy Scorers
|
|
2
|
+
|
|
3
|
+
Mastra provides two tool call accuracy scorers for evaluating whether an LLM selects the correct tools from available options:
|
|
4
|
+
|
|
5
|
+
1. **Code-based scorer** - Deterministic evaluation using exact tool matching
|
|
6
|
+
2. **LLM-based scorer** - Semantic evaluation using AI to assess appropriateness
|
|
7
|
+
|
|
8
|
+
## Choosing Between Scorers
|
|
9
|
+
|
|
10
|
+
### Use the Code-Based Scorer When:
|
|
11
|
+
|
|
12
|
+
- You need **deterministic, reproducible** results
|
|
13
|
+
- You want to test **exact tool matching**
|
|
14
|
+
- You need to validate **specific tool sequences**
|
|
15
|
+
- Speed and cost are priorities (no LLM calls)
|
|
16
|
+
- You're running automated tests
|
|
17
|
+
|
|
18
|
+
### Use the LLM-Based Scorer When:
|
|
19
|
+
|
|
20
|
+
- You need **semantic understanding** of appropriateness
|
|
21
|
+
- Tool selection depends on **context and intent**
|
|
22
|
+
- You want to handle **edge cases** like clarification requests
|
|
23
|
+
- You need **explanations** for scoring decisions
|
|
24
|
+
- You're evaluating **production agent behavior**
|
|
25
|
+
|
|
26
|
+
## Code-Based Tool Call Accuracy Scorer
|
|
27
|
+
|
|
28
|
+
The `createToolCallAccuracyScorerCode()` function from `@mastra/evals/scorers/prebuilt` provides deterministic binary scoring based on exact tool matching and supports both strict and lenient evaluation modes, as well as tool calling order validation.
|
|
29
|
+
|
|
30
|
+
### Parameters
|
|
31
|
+
|
|
32
|
+
**expectedTool:** (`string`): The name of the tool that should be called for the given task. Ignored when expectedToolOrder is provided.
|
|
33
|
+
|
|
34
|
+
**strictMode:** (`boolean`): Controls evaluation strictness. For single tool mode: only exact single tool calls accepted. For order checking mode: tools must match exactly with no extra tools allowed.
|
|
35
|
+
|
|
36
|
+
**expectedToolOrder:** (`string[]`): Array of tool names in the expected calling order. When provided, enables order checking mode and ignores expectedTool parameter.
|
|
37
|
+
|
|
38
|
+
This function returns an instance of the MastraScorer class. See the [MastraScorer reference](https://mastra.ai/reference/evals/mastra-scorer) for details on the `.run()` method and its input/output.
|
|
39
|
+
|
|
40
|
+
### Evaluation Modes
|
|
41
|
+
|
|
42
|
+
The code-based scorer operates in two distinct modes:
|
|
43
|
+
|
|
44
|
+
#### Single Tool Mode
|
|
45
|
+
|
|
46
|
+
When `expectedToolOrder` is not provided, the scorer evaluates single tool selection:
|
|
47
|
+
|
|
48
|
+
- **Standard Mode (strictMode: false)**: Returns `1` if the expected tool is called, regardless of other tools
|
|
49
|
+
- **Strict Mode (strictMode: true)**: Returns `1` only if exactly one tool is called and it matches the expected tool
|
|
50
|
+
|
|
51
|
+
#### Order Checking Mode
|
|
52
|
+
|
|
53
|
+
When `expectedToolOrder` is provided, the scorer validates tool calling sequence:
|
|
54
|
+
|
|
55
|
+
- **Strict Order (strictMode: true)**: Tools must be called in exactly the specified order with no extra tools
|
|
56
|
+
- **Flexible Order (strictMode: false)**: Expected tools must appear in correct relative order (extra tools allowed)
|
|
57
|
+
|
|
58
|
+
## Code-Based Scoring Details
|
|
59
|
+
|
|
60
|
+
- **Binary scores**: Always returns 0 or 1
|
|
61
|
+
- **Deterministic**: Same input always produces same output
|
|
62
|
+
- **Fast**: No external API calls
|
|
63
|
+
|
|
64
|
+
### Code-Based Scorer Options
|
|
65
|
+
|
|
66
|
+
```typescript
|
|
67
|
+
// Standard mode - passes if expected tool is called
|
|
68
|
+
const lenientScorer = createCodeScorer({
|
|
69
|
+
expectedTool: 'search-tool',
|
|
70
|
+
strictMode: false,
|
|
71
|
+
})
|
|
72
|
+
|
|
73
|
+
// Strict mode - only passes if exactly one tool is called
|
|
74
|
+
const strictScorer = createCodeScorer({
|
|
75
|
+
expectedTool: 'search-tool',
|
|
76
|
+
strictMode: true,
|
|
77
|
+
})
|
|
78
|
+
|
|
79
|
+
// Order checking with strict mode
|
|
80
|
+
const strictOrderScorer = createCodeScorer({
|
|
81
|
+
expectedTool: 'step1-tool',
|
|
82
|
+
expectedToolOrder: ['step1-tool', 'step2-tool', 'step3-tool'],
|
|
83
|
+
strictMode: true, // no extra tools allowed
|
|
84
|
+
})
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### Code-Based Scorer Results
|
|
88
|
+
|
|
89
|
+
```typescript
|
|
90
|
+
{
|
|
91
|
+
runId: string,
|
|
92
|
+
preprocessStepResult: {
|
|
93
|
+
expectedTool: string,
|
|
94
|
+
actualTools: string[],
|
|
95
|
+
strictMode: boolean,
|
|
96
|
+
expectedToolOrder?: string[],
|
|
97
|
+
hasToolCalls: boolean,
|
|
98
|
+
correctToolCalled: boolean,
|
|
99
|
+
correctOrderCalled: boolean | null,
|
|
100
|
+
toolCallInfos: ToolCallInfo[]
|
|
101
|
+
},
|
|
102
|
+
score: number // Always 0 or 1
|
|
103
|
+
}
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## Code-Based Scorer Examples
|
|
107
|
+
|
|
108
|
+
The code-based scorer provides deterministic, binary scoring (0 or 1) based on exact tool matching.
|
|
109
|
+
|
|
110
|
+
### Correct tool selection
|
|
111
|
+
|
|
112
|
+
```typescript
|
|
113
|
+
const scorer = createToolCallAccuracyScorerCode({
|
|
114
|
+
expectedTool: 'weather-tool',
|
|
115
|
+
})
|
|
116
|
+
|
|
117
|
+
// Simulate LLM input and output with tool call
|
|
118
|
+
const inputMessages = [
|
|
119
|
+
createTestMessage({
|
|
120
|
+
content: 'What is the weather like in New York today?',
|
|
121
|
+
role: 'user',
|
|
122
|
+
id: 'input-1',
|
|
123
|
+
}),
|
|
124
|
+
]
|
|
125
|
+
|
|
126
|
+
const output = [
|
|
127
|
+
createTestMessage({
|
|
128
|
+
content: 'Let me check the weather for you.',
|
|
129
|
+
role: 'assistant',
|
|
130
|
+
id: 'output-1',
|
|
131
|
+
toolInvocations: [
|
|
132
|
+
createToolInvocation({
|
|
133
|
+
toolCallId: 'call-123',
|
|
134
|
+
toolName: 'weather-tool',
|
|
135
|
+
args: { location: 'New York' },
|
|
136
|
+
result: { temperature: '72°F', condition: 'sunny' },
|
|
137
|
+
state: 'result',
|
|
138
|
+
}),
|
|
139
|
+
],
|
|
140
|
+
}),
|
|
141
|
+
]
|
|
142
|
+
|
|
143
|
+
const run = createAgentTestRun({ inputMessages, output })
|
|
144
|
+
const result = await scorer.run(run)
|
|
145
|
+
|
|
146
|
+
console.log(result.score) // 1
|
|
147
|
+
console.log(result.preprocessStepResult?.correctToolCalled) // true
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
### Strict mode evaluation
|
|
151
|
+
|
|
152
|
+
Only passes if exactly one tool is called:
|
|
153
|
+
|
|
154
|
+
```typescript
|
|
155
|
+
const strictScorer = createToolCallAccuracyScorerCode({
|
|
156
|
+
expectedTool: 'weather-tool',
|
|
157
|
+
strictMode: true,
|
|
158
|
+
})
|
|
159
|
+
|
|
160
|
+
// Multiple tools called - fails in strict mode
|
|
161
|
+
const output = [
|
|
162
|
+
createTestMessage({
|
|
163
|
+
content: 'Let me help you with that.',
|
|
164
|
+
role: 'assistant',
|
|
165
|
+
id: 'output-1',
|
|
166
|
+
toolInvocations: [
|
|
167
|
+
createToolInvocation({
|
|
168
|
+
toolCallId: 'call-1',
|
|
169
|
+
toolName: 'search-tool',
|
|
170
|
+
args: {},
|
|
171
|
+
result: {},
|
|
172
|
+
state: 'result',
|
|
173
|
+
}),
|
|
174
|
+
createToolInvocation({
|
|
175
|
+
toolCallId: 'call-2',
|
|
176
|
+
toolName: 'weather-tool',
|
|
177
|
+
args: { location: 'New York' },
|
|
178
|
+
result: { temperature: '20°C' },
|
|
179
|
+
state: 'result',
|
|
180
|
+
}),
|
|
181
|
+
],
|
|
182
|
+
}),
|
|
183
|
+
]
|
|
184
|
+
|
|
185
|
+
const result = await strictScorer.run(run)
|
|
186
|
+
console.log(result.score) // 0 - fails because multiple tools were called
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
### Tool order validation
|
|
190
|
+
|
|
191
|
+
Validates that tools are called in a specific sequence:
|
|
192
|
+
|
|
193
|
+
```typescript
|
|
194
|
+
const orderScorer = createToolCallAccuracyScorerCode({
|
|
195
|
+
expectedTool: 'auth-tool', // ignored when order is specified
|
|
196
|
+
expectedToolOrder: ['auth-tool', 'fetch-tool'],
|
|
197
|
+
strictMode: true, // no extra tools allowed
|
|
198
|
+
})
|
|
199
|
+
|
|
200
|
+
const output = [
|
|
201
|
+
createTestMessage({
|
|
202
|
+
content: 'I will authenticate and fetch the data.',
|
|
203
|
+
role: 'assistant',
|
|
204
|
+
id: 'output-1',
|
|
205
|
+
toolInvocations: [
|
|
206
|
+
createToolInvocation({
|
|
207
|
+
toolCallId: 'call-1',
|
|
208
|
+
toolName: 'auth-tool',
|
|
209
|
+
args: { token: 'abc123' },
|
|
210
|
+
result: { authenticated: true },
|
|
211
|
+
state: 'result',
|
|
212
|
+
}),
|
|
213
|
+
createToolInvocation({
|
|
214
|
+
toolCallId: 'call-2',
|
|
215
|
+
toolName: 'fetch-tool',
|
|
216
|
+
args: { endpoint: '/data' },
|
|
217
|
+
result: { data: ['item1'] },
|
|
218
|
+
state: 'result',
|
|
219
|
+
}),
|
|
220
|
+
],
|
|
221
|
+
}),
|
|
222
|
+
]
|
|
223
|
+
|
|
224
|
+
const result = await orderScorer.run(run)
|
|
225
|
+
console.log(result.score) // 1 - correct order
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
### Flexible order mode
|
|
229
|
+
|
|
230
|
+
Allows extra tools as long as expected tools maintain relative order:
|
|
231
|
+
|
|
232
|
+
```typescript
|
|
233
|
+
const flexibleOrderScorer = createToolCallAccuracyScorerCode({
|
|
234
|
+
expectedTool: 'auth-tool',
|
|
235
|
+
expectedToolOrder: ['auth-tool', 'fetch-tool'],
|
|
236
|
+
strictMode: false, // allows extra tools
|
|
237
|
+
})
|
|
238
|
+
|
|
239
|
+
const output = [
|
|
240
|
+
createTestMessage({
|
|
241
|
+
content: 'Performing comprehensive operation.',
|
|
242
|
+
role: 'assistant',
|
|
243
|
+
id: 'output-1',
|
|
244
|
+
toolInvocations: [
|
|
245
|
+
createToolInvocation({
|
|
246
|
+
toolCallId: 'call-1',
|
|
247
|
+
toolName: 'auth-tool',
|
|
248
|
+
args: { token: 'abc123' },
|
|
249
|
+
result: { authenticated: true },
|
|
250
|
+
state: 'result',
|
|
251
|
+
}),
|
|
252
|
+
createToolInvocation({
|
|
253
|
+
toolCallId: 'call-2',
|
|
254
|
+
toolName: 'log-tool', // Extra tool - OK in flexible mode
|
|
255
|
+
args: { message: 'Starting fetch' },
|
|
256
|
+
result: { logged: true },
|
|
257
|
+
state: 'result',
|
|
258
|
+
}),
|
|
259
|
+
createToolInvocation({
|
|
260
|
+
toolCallId: 'call-3',
|
|
261
|
+
toolName: 'fetch-tool',
|
|
262
|
+
args: { endpoint: '/data' },
|
|
263
|
+
result: { data: ['item1'] },
|
|
264
|
+
state: 'result',
|
|
265
|
+
}),
|
|
266
|
+
],
|
|
267
|
+
}),
|
|
268
|
+
]
|
|
269
|
+
|
|
270
|
+
const result = await flexibleOrderScorer.run(run)
|
|
271
|
+
console.log(result.score) // 1 - auth-tool comes before fetch-tool
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
## LLM-Based Tool Call Accuracy Scorer
|
|
275
|
+
|
|
276
|
+
The `createToolCallAccuracyScorerLLM()` function from `@mastra/evals/scorers/prebuilt` uses an LLM to evaluate whether the tools called by an agent are appropriate for the given user request, providing semantic evaluation rather than exact matching.
|
|
277
|
+
|
|
278
|
+
### Parameters
|
|
279
|
+
|
|
280
|
+
**model:** (`MastraModelConfig`): The LLM model to use for evaluating tool appropriateness
|
|
281
|
+
|
|
282
|
+
**availableTools:** (`Array<{name: string, description: string}>`): List of available tools with their descriptions for context
|
|
283
|
+
|
|
284
|
+
### Features
|
|
285
|
+
|
|
286
|
+
The LLM-based scorer provides:
|
|
287
|
+
|
|
288
|
+
- **Semantic Evaluation**: Understands context and user intent
|
|
289
|
+
- **Appropriateness Assessment**: Distinguishes between "helpful" and "appropriate" tools
|
|
290
|
+
- **Clarification Handling**: Recognizes when agents appropriately ask for clarification
|
|
291
|
+
- **Missing Tool Detection**: Identifies tools that should have been called
|
|
292
|
+
- **Reasoning Generation**: Provides explanations for scoring decisions
|
|
293
|
+
|
|
294
|
+
### Evaluation Process
|
|
295
|
+
|
|
296
|
+
1. **Extract Tool Calls**: Identifies tools mentioned in agent output
|
|
297
|
+
2. **Analyze Appropriateness**: Evaluates each tool against user request
|
|
298
|
+
3. **Generate Score**: Calculates score based on appropriate vs total tool calls
|
|
299
|
+
4. **Generate Reasoning**: Provides human-readable explanation
|
|
300
|
+
|
|
301
|
+
## LLM-Based Scoring Details
|
|
302
|
+
|
|
303
|
+
- **Fractional scores**: Returns values between 0.0 and 1.0
|
|
304
|
+
- **Context-aware**: Considers user intent and appropriateness
|
|
305
|
+
- **Explanatory**: Provides reasoning for scores
|
|
306
|
+
|
|
307
|
+
### LLM-Based Scorer Options
|
|
308
|
+
|
|
309
|
+
```typescript
|
|
310
|
+
// Basic configuration
|
|
311
|
+
const basicLLMScorer = createLLMScorer({
|
|
312
|
+
model: 'openai/gpt-5.1',
|
|
313
|
+
availableTools: [
|
|
314
|
+
{ name: 'tool1', description: 'Description 1' },
|
|
315
|
+
{ name: 'tool2', description: 'Description 2' }
|
|
316
|
+
]
|
|
317
|
+
});
|
|
318
|
+
|
|
319
|
+
// With different model
|
|
320
|
+
const customModelScorer = createLLMScorer({
|
|
321
|
+
model: 'openai/gpt-5', // More powerful model for complex evaluations
|
|
322
|
+
availableTools: [...]
|
|
323
|
+
});
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
### LLM-Based Scorer Results
|
|
327
|
+
|
|
328
|
+
```typescript
|
|
329
|
+
{
|
|
330
|
+
runId: string,
|
|
331
|
+
score: number, // 0.0 to 1.0
|
|
332
|
+
reason: string, // Human-readable explanation
|
|
333
|
+
analyzeStepResult: {
|
|
334
|
+
evaluations: Array<{
|
|
335
|
+
toolCalled: string,
|
|
336
|
+
wasAppropriate: boolean,
|
|
337
|
+
reasoning: string
|
|
338
|
+
}>,
|
|
339
|
+
missingTools?: string[]
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
## LLM-Based Scorer Examples
|
|
345
|
+
|
|
346
|
+
The LLM-based scorer uses AI to evaluate whether tool selections are appropriate for the user's request.
|
|
347
|
+
|
|
348
|
+
### Basic LLM evaluation
|
|
349
|
+
|
|
350
|
+
```typescript
|
|
351
|
+
const llmScorer = createToolCallAccuracyScorerLLM({
|
|
352
|
+
model: 'openai/gpt-5.1',
|
|
353
|
+
availableTools: [
|
|
354
|
+
{
|
|
355
|
+
name: 'weather-tool',
|
|
356
|
+
description: 'Get current weather information for any location',
|
|
357
|
+
},
|
|
358
|
+
{
|
|
359
|
+
name: 'calendar-tool',
|
|
360
|
+
description: 'Check calendar events and scheduling',
|
|
361
|
+
},
|
|
362
|
+
{
|
|
363
|
+
name: 'search-tool',
|
|
364
|
+
description: 'Search the web for general information',
|
|
365
|
+
},
|
|
366
|
+
],
|
|
367
|
+
})
|
|
368
|
+
|
|
369
|
+
const inputMessages = [
|
|
370
|
+
createTestMessage({
|
|
371
|
+
content: 'What is the weather like in San Francisco today?',
|
|
372
|
+
role: 'user',
|
|
373
|
+
id: 'input-1',
|
|
374
|
+
}),
|
|
375
|
+
]
|
|
376
|
+
|
|
377
|
+
const output = [
|
|
378
|
+
createTestMessage({
|
|
379
|
+
content: 'Let me check the current weather for you.',
|
|
380
|
+
role: 'assistant',
|
|
381
|
+
id: 'output-1',
|
|
382
|
+
toolInvocations: [
|
|
383
|
+
createToolInvocation({
|
|
384
|
+
toolCallId: 'call-123',
|
|
385
|
+
toolName: 'weather-tool',
|
|
386
|
+
args: { location: 'San Francisco', date: 'today' },
|
|
387
|
+
result: { temperature: '68°F', condition: 'foggy' },
|
|
388
|
+
state: 'result',
|
|
389
|
+
}),
|
|
390
|
+
],
|
|
391
|
+
}),
|
|
392
|
+
]
|
|
393
|
+
|
|
394
|
+
const run = createAgentTestRun({ inputMessages, output })
|
|
395
|
+
const result = await llmScorer.run(run)
|
|
396
|
+
|
|
397
|
+
console.log(result.score) // 1.0 - appropriate tool usage
|
|
398
|
+
console.log(result.reason) // "The agent correctly used the weather-tool to address the user's request for weather information."
|
|
399
|
+
```
|
|
400
|
+
|
|
401
|
+
### Handling inappropriate tool usage
|
|
402
|
+
|
|
403
|
+
```typescript
|
|
404
|
+
const inputMessages = [
|
|
405
|
+
createTestMessage({
|
|
406
|
+
content: 'What is the weather in Tokyo?',
|
|
407
|
+
role: 'user',
|
|
408
|
+
id: 'input-1',
|
|
409
|
+
}),
|
|
410
|
+
]
|
|
411
|
+
|
|
412
|
+
const inappropriateOutput = [
|
|
413
|
+
createTestMessage({
|
|
414
|
+
content: 'Let me search for that information.',
|
|
415
|
+
role: 'assistant',
|
|
416
|
+
id: 'output-1',
|
|
417
|
+
toolInvocations: [
|
|
418
|
+
createToolInvocation({
|
|
419
|
+
toolCallId: 'call-456',
|
|
420
|
+
toolName: 'search-tool', // Less appropriate than weather-tool
|
|
421
|
+
args: { query: 'Tokyo weather' },
|
|
422
|
+
result: { results: ['Tokyo weather data...'] },
|
|
423
|
+
state: 'result',
|
|
424
|
+
}),
|
|
425
|
+
],
|
|
426
|
+
}),
|
|
427
|
+
]
|
|
428
|
+
|
|
429
|
+
const run = createAgentTestRun({ inputMessages, output: inappropriateOutput })
|
|
430
|
+
const result = await llmScorer.run(run)
|
|
431
|
+
|
|
432
|
+
console.log(result.score) // 0.5 - partially appropriate
|
|
433
|
+
console.log(result.reason) // "The agent used search-tool when weather-tool would have been more appropriate for a direct weather query."
|
|
434
|
+
```
|
|
435
|
+
|
|
436
|
+
### Evaluating clarification requests
|
|
437
|
+
|
|
438
|
+
The LLM scorer recognizes when agents appropriately ask for clarification:
|
|
439
|
+
|
|
440
|
+
```typescript
|
|
441
|
+
const vagueInput = [
|
|
442
|
+
createTestMessage({
|
|
443
|
+
content: 'I need help with something',
|
|
444
|
+
role: 'user',
|
|
445
|
+
id: 'input-1'
|
|
446
|
+
})
|
|
447
|
+
];
|
|
448
|
+
|
|
449
|
+
const clarificationOutput = [
|
|
450
|
+
createTestMessage({
|
|
451
|
+
content: 'I'd be happy to help! Could you please provide more details about what you need assistance with?',
|
|
452
|
+
role: 'assistant',
|
|
453
|
+
id: 'output-1',
|
|
454
|
+
// No tools called - asking for clarification instead
|
|
455
|
+
})
|
|
456
|
+
];
|
|
457
|
+
|
|
458
|
+
const run = createAgentTestRun({
|
|
459
|
+
inputMessages: vagueInput,
|
|
460
|
+
output: clarificationOutput
|
|
461
|
+
});
|
|
462
|
+
const result = await llmScorer.run(run);
|
|
463
|
+
|
|
464
|
+
console.log(result.score); // 1.0 - appropriate to ask for clarification
|
|
465
|
+
console.log(result.reason); // "The agent appropriately asked for clarification rather than calling tools with insufficient information."
|
|
466
|
+
```
|
|
467
|
+
|
|
468
|
+
## Comparing Both Scorers
|
|
469
|
+
|
|
470
|
+
Here's an example using both scorers on the same data:
|
|
471
|
+
|
|
472
|
+
```typescript
|
|
473
|
+
import {
|
|
474
|
+
createToolCallAccuracyScorerCode as createCodeScorer,
|
|
475
|
+
createToolCallAccuracyScorerLLM as createLLMScorer,
|
|
476
|
+
} from '@mastra/evals/scorers/prebuilt'
|
|
477
|
+
|
|
478
|
+
// Setup both scorers
|
|
479
|
+
const codeScorer = createCodeScorer({
|
|
480
|
+
expectedTool: 'weather-tool',
|
|
481
|
+
strictMode: false,
|
|
482
|
+
})
|
|
483
|
+
|
|
484
|
+
const llmScorer = createLLMScorer({
|
|
485
|
+
model: 'openai/gpt-5.1',
|
|
486
|
+
availableTools: [
|
|
487
|
+
{ name: 'weather-tool', description: 'Get weather information' },
|
|
488
|
+
{ name: 'search-tool', description: 'Search the web' },
|
|
489
|
+
],
|
|
490
|
+
})
|
|
491
|
+
|
|
492
|
+
// Test data
|
|
493
|
+
const run = createAgentTestRun({
|
|
494
|
+
inputMessages: [
|
|
495
|
+
createTestMessage({
|
|
496
|
+
content: 'What is the weather?',
|
|
497
|
+
role: 'user',
|
|
498
|
+
id: 'input-1',
|
|
499
|
+
}),
|
|
500
|
+
],
|
|
501
|
+
output: [
|
|
502
|
+
createTestMessage({
|
|
503
|
+
content: 'Let me find that information.',
|
|
504
|
+
role: 'assistant',
|
|
505
|
+
id: 'output-1',
|
|
506
|
+
toolInvocations: [
|
|
507
|
+
createToolInvocation({
|
|
508
|
+
toolCallId: 'call-1',
|
|
509
|
+
toolName: 'search-tool',
|
|
510
|
+
args: { query: 'weather' },
|
|
511
|
+
result: { results: ['weather data'] },
|
|
512
|
+
state: 'result',
|
|
513
|
+
}),
|
|
514
|
+
],
|
|
515
|
+
}),
|
|
516
|
+
],
|
|
517
|
+
})
|
|
518
|
+
|
|
519
|
+
// Run both scorers
|
|
520
|
+
const codeResult = await codeScorer.run(run)
|
|
521
|
+
const llmResult = await llmScorer.run(run)
|
|
522
|
+
|
|
523
|
+
console.log('Code Scorer:', codeResult.score) // 0 - wrong tool
|
|
524
|
+
console.log('LLM Scorer:', llmResult.score) // 0.3 - partially appropriate
|
|
525
|
+
console.log('LLM Reason:', llmResult.reason) // Explains why search-tool is less appropriate
|
|
526
|
+
```
|
|
527
|
+
|
|
528
|
+
## Related
|
|
529
|
+
|
|
530
|
+
- [Answer Relevancy Scorer](https://mastra.ai/reference/evals/answer-relevancy)
|
|
531
|
+
- [Completeness Scorer](https://mastra.ai/reference/evals/completeness)
|
|
532
|
+
- [Faithfulness Scorer](https://mastra.ai/reference/evals/faithfulness)
|
|
533
|
+
- [Custom Scorers](https://mastra.ai/docs/evals/custom-scorers)
|