@mastra/mcp-docs-server 1.1.9-alpha.0 → 1.1.9-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/docs/agents/adding-voice.md +4 -4
- package/.docs/docs/agents/agent-approval.md +3 -3
- package/.docs/docs/agents/agent-memory.md +1 -1
- package/.docs/docs/agents/network-approval.md +1 -1
- package/.docs/docs/agents/networks.md +1 -1
- package/.docs/docs/agents/overview.md +1 -1
- package/.docs/docs/agents/processors.md +8 -8
- package/.docs/docs/agents/structured-output.md +1 -1
- package/.docs/docs/agents/supervisor-agents.md +3 -3
- package/.docs/docs/agents/using-tools.md +1 -1
- package/.docs/docs/build-with-ai/mcp-docs-server.md +1 -1
- package/.docs/docs/build-with-ai/skills.md +1 -1
- package/.docs/docs/community/contributing-templates.md +1 -1
- package/.docs/docs/community/discord.md +2 -2
- package/.docs/docs/community/licensing.md +1 -1
- package/.docs/docs/deployment/cloud-providers.md +2 -2
- package/.docs/docs/deployment/mastra-server.md +1 -1
- package/.docs/docs/deployment/monorepo.md +1 -1
- package/.docs/docs/deployment/overview.md +3 -3
- package/.docs/docs/deployment/studio.md +1 -1
- package/.docs/docs/deployment/web-framework.md +1 -1
- package/.docs/docs/deployment/workflow-runners.md +1 -1
- package/.docs/docs/evals/built-in-scorers.md +1 -1
- package/.docs/docs/evals/custom-scorers.md +6 -6
- package/.docs/docs/evals/overview.md +1 -1
- package/.docs/docs/evals/running-in-ci.md +6 -6
- package/.docs/docs/getting-started/build-with-ai.md +2 -2
- package/.docs/docs/getting-started/manual-install.md +1 -1
- package/.docs/docs/getting-started/project-structure.md +1 -1
- package/.docs/docs/index.md +1 -1
- package/.docs/docs/mcp/overview.md +1 -1
- package/.docs/docs/mcp/publishing-mcp-server.md +3 -3
- package/.docs/docs/memory/memory-processors.md +8 -8
- package/.docs/docs/memory/message-history.md +2 -2
- package/.docs/docs/memory/observational-memory.md +5 -5
- package/.docs/docs/memory/semantic-recall.md +7 -7
- package/.docs/docs/memory/working-memory.md +14 -14
- package/.docs/docs/observability/datasets/overview.md +1 -1
- package/.docs/docs/observability/datasets/running-experiments.md +1 -1
- package/.docs/docs/observability/logging.md +1 -1
- package/.docs/docs/observability/overview.md +5 -5
- package/.docs/docs/observability/tracing/bridges/otel.md +7 -7
- package/.docs/docs/observability/tracing/exporters/arize.md +3 -3
- package/.docs/docs/observability/tracing/exporters/braintrust.md +1 -1
- package/.docs/docs/observability/tracing/exporters/cloud.md +2 -2
- package/.docs/docs/observability/tracing/exporters/datadog.md +3 -3
- package/.docs/docs/observability/tracing/exporters/default.md +7 -7
- package/.docs/docs/observability/tracing/exporters/laminar.md +1 -1
- package/.docs/docs/observability/tracing/exporters/langfuse.md +3 -3
- package/.docs/docs/observability/tracing/exporters/langsmith.md +4 -4
- package/.docs/docs/observability/tracing/exporters/otel.md +8 -8
- package/.docs/docs/observability/tracing/exporters/posthog.md +2 -2
- package/.docs/docs/observability/tracing/exporters/sentry.md +4 -4
- package/.docs/docs/observability/tracing/overview.md +20 -20
- package/.docs/docs/observability/tracing/processors/sensitive-data-filter.md +11 -11
- package/.docs/docs/rag/chunking-and-embedding.md +4 -4
- package/.docs/docs/rag/overview.md +2 -2
- package/.docs/docs/rag/retrieval.md +4 -4
- package/.docs/docs/rag/vector-databases.md +11 -11
- package/.docs/docs/server/auth/auth0.md +1 -1
- package/.docs/docs/server/auth/clerk.md +1 -1
- package/.docs/docs/server/auth/composite-auth.md +9 -9
- package/.docs/docs/server/auth/custom-auth-provider.md +12 -12
- package/.docs/docs/server/auth/firebase.md +2 -2
- package/.docs/docs/server/auth/jwt.md +1 -1
- package/.docs/docs/server/auth/simple-auth.md +8 -8
- package/.docs/docs/server/auth/supabase.md +1 -1
- package/.docs/docs/server/auth/workos.md +1 -1
- package/.docs/docs/server/auth.md +1 -1
- package/.docs/docs/server/custom-adapters.md +7 -7
- package/.docs/docs/server/custom-api-routes.md +2 -2
- package/.docs/docs/server/mastra-client.md +1 -1
- package/.docs/docs/server/mastra-server.md +1 -1
- package/.docs/docs/server/request-context.md +2 -2
- package/.docs/docs/server/server-adapters.md +1 -1
- package/.docs/docs/streaming/events.md +1 -1
- package/.docs/docs/streaming/overview.md +1 -1
- package/.docs/docs/streaming/tool-streaming.md +2 -2
- package/.docs/docs/voice/overview.md +3 -3
- package/.docs/docs/voice/speech-to-speech.md +1 -1
- package/.docs/docs/voice/speech-to-text.md +2 -2
- package/.docs/docs/voice/text-to-speech.md +2 -2
- package/.docs/docs/workflows/agents-and-tools.md +1 -1
- package/.docs/docs/workflows/control-flow.md +1 -1
- package/.docs/docs/workflows/error-handling.md +3 -3
- package/.docs/docs/workflows/suspend-and-resume.md +1 -1
- package/.docs/docs/workflows/time-travel.md +1 -1
- package/.docs/docs/workflows/workflow-state.md +1 -1
- package/.docs/docs/workspace/filesystem.md +1 -1
- package/.docs/docs/workspace/overview.md +1 -1
- package/.docs/docs/workspace/search.md +1 -1
- package/.docs/docs/workspace/skills.md +2 -2
- package/.docs/guides/build-your-ui/ai-sdk-ui.md +2 -2
- package/.docs/guides/build-your-ui/assistant-ui.md +1 -1
- package/.docs/guides/build-your-ui/copilotkit.md +1 -1
- package/.docs/guides/deployment/digital-ocean.md +1 -1
- package/.docs/guides/getting-started/astro.md +1 -1
- package/.docs/guides/getting-started/electron.md +1 -1
- package/.docs/guides/getting-started/next-js.md +1 -1
- package/.docs/guides/getting-started/vite-react.md +1 -1
- package/.docs/guides/guide/ai-recruiter.md +3 -3
- package/.docs/guides/guide/chef-michel.md +4 -4
- package/.docs/guides/guide/code-review-bot.md +3 -3
- package/.docs/guides/guide/dev-assistant.md +5 -5
- package/.docs/guides/guide/docs-manager.md +3 -3
- package/.docs/guides/guide/github-actions-pr-description.md +2 -2
- package/.docs/guides/guide/notes-mcp-server.md +3 -3
- package/.docs/guides/guide/research-assistant.md +4 -4
- package/.docs/guides/guide/research-coordinator.md +1 -1
- package/.docs/guides/guide/stock-agent.md +4 -4
- package/.docs/guides/guide/web-search.md +2 -2
- package/.docs/guides/guide/whatsapp-chat-bot.md +1 -1
- package/.docs/guides/migrations/ai-sdk-v4-to-v5.md +3 -3
- package/.docs/guides/migrations/network-to-supervisor.md +1 -1
- package/.docs/guides/migrations/upgrade-to-v1/agent.md +1 -1
- package/.docs/guides/migrations/upgrade-to-v1/deployment.md +1 -1
- package/.docs/guides/migrations/upgrade-to-v1/evals.md +1 -1
- package/.docs/guides/migrations/upgrade-to-v1/mastra.md +1 -1
- package/.docs/guides/migrations/upgrade-to-v1/overview.md +3 -3
- package/.docs/guides/migrations/upgrade-to-v1/storage.md +3 -3
- package/.docs/guides/migrations/upgrade-to-v1/tracing.md +2 -2
- package/.docs/guides/migrations/upgrade-to-v1/vectors.md +3 -3
- package/.docs/guides/migrations/upgrade-to-v1/voice.md +1 -1
- package/.docs/guides/migrations/upgrade-to-v1/workflows.md +1 -1
- package/.docs/guides/migrations/vnext-to-standard-apis.md +1 -1
- package/.docs/models/embeddings.md +4 -4
- package/.docs/models/gateways/custom-gateways.md +4 -4
- package/.docs/models/gateways/netlify.md +1 -1
- package/.docs/models/gateways/openrouter.md +1 -1
- package/.docs/models/gateways/vercel.md +9 -2
- package/.docs/models/gateways.md +2 -2
- package/.docs/models/index.md +1 -1
- package/.docs/models/providers/302ai.md +3 -3
- package/.docs/models/providers/abacus.md +3 -3
- package/.docs/models/providers/aihubmix.md +3 -3
- package/.docs/models/providers/alibaba-cn.md +3 -3
- package/.docs/models/providers/alibaba-coding-plan-cn.md +3 -3
- package/.docs/models/providers/alibaba-coding-plan.md +3 -3
- package/.docs/models/providers/alibaba.md +3 -3
- package/.docs/models/providers/anthropic.md +4 -4
- package/.docs/models/providers/bailing.md +3 -3
- package/.docs/models/providers/baseten.md +3 -3
- package/.docs/models/providers/berget.md +3 -3
- package/.docs/models/providers/cerebras.md +4 -4
- package/.docs/models/providers/chutes.md +6 -5
- package/.docs/models/providers/clarifai.md +3 -3
- package/.docs/models/providers/cloudferro-sherlock.md +3 -3
- package/.docs/models/providers/cloudflare-workers-ai.md +3 -3
- package/.docs/models/providers/cortecs.md +3 -3
- package/.docs/models/providers/deepinfra.md +4 -4
- package/.docs/models/providers/deepseek.md +3 -3
- package/.docs/models/providers/drun.md +3 -3
- package/.docs/models/providers/evroc.md +3 -3
- package/.docs/models/providers/fastrouter.md +3 -3
- package/.docs/models/providers/fireworks-ai.md +3 -3
- package/.docs/models/providers/firmware.md +3 -3
- package/.docs/models/providers/friendli.md +3 -3
- package/.docs/models/providers/github-models.md +3 -3
- package/.docs/models/providers/google.md +4 -4
- package/.docs/models/providers/groq.md +4 -4
- package/.docs/models/providers/helicone.md +3 -3
- package/.docs/models/providers/huggingface.md +3 -3
- package/.docs/models/providers/iflowcn.md +3 -3
- package/.docs/models/providers/inception.md +3 -3
- package/.docs/models/providers/inference.md +3 -3
- package/.docs/models/providers/io-net.md +3 -3
- package/.docs/models/providers/jiekou.md +3 -3
- package/.docs/models/providers/kilo.md +3 -3
- package/.docs/models/providers/kimi-for-coding.md +4 -4
- package/.docs/models/providers/kuae-cloud-coding-plan.md +3 -3
- package/.docs/models/providers/llama.md +3 -3
- package/.docs/models/providers/lmstudio.md +3 -3
- package/.docs/models/providers/lucidquery.md +3 -3
- package/.docs/models/providers/meganova.md +3 -3
- package/.docs/models/providers/minimax-cn-coding-plan.md +4 -4
- package/.docs/models/providers/minimax-cn.md +4 -4
- package/.docs/models/providers/minimax-coding-plan.md +4 -4
- package/.docs/models/providers/minimax.md +4 -4
- package/.docs/models/providers/mistral.md +4 -4
- package/.docs/models/providers/moark.md +3 -3
- package/.docs/models/providers/modelscope.md +3 -3
- package/.docs/models/providers/moonshotai-cn.md +3 -3
- package/.docs/models/providers/moonshotai.md +3 -3
- package/.docs/models/providers/morph.md +3 -3
- package/.docs/models/providers/nano-gpt.md +25 -23
- package/.docs/models/providers/nebius.md +3 -3
- package/.docs/models/providers/nova.md +3 -3
- package/.docs/models/providers/novita-ai.md +3 -3
- package/.docs/models/providers/nvidia.md +3 -3
- package/.docs/models/providers/ollama-cloud.md +3 -3
- package/.docs/models/providers/openai.md +4 -4
- package/.docs/models/providers/opencode-go.md +3 -3
- package/.docs/models/providers/opencode.md +3 -3
- package/.docs/models/providers/ovhcloud.md +3 -3
- package/.docs/models/providers/perplexity-agent.md +4 -4
- package/.docs/models/providers/perplexity.md +4 -4
- package/.docs/models/providers/poe.md +3 -3
- package/.docs/models/providers/privatemode-ai.md +3 -3
- package/.docs/models/providers/qihang-ai.md +3 -3
- package/.docs/models/providers/qiniu-ai.md +3 -3
- package/.docs/models/providers/requesty.md +3 -3
- package/.docs/models/providers/scaleway.md +3 -3
- package/.docs/models/providers/siliconflow-cn.md +3 -3
- package/.docs/models/providers/siliconflow.md +3 -3
- package/.docs/models/providers/stackit.md +3 -3
- package/.docs/models/providers/stepfun.md +3 -3
- package/.docs/models/providers/submodel.md +3 -3
- package/.docs/models/providers/synthetic.md +3 -3
- package/.docs/models/providers/togetherai.md +4 -4
- package/.docs/models/providers/upstage.md +3 -3
- package/.docs/models/providers/vivgrid.md +4 -4
- package/.docs/models/providers/vultr.md +3 -3
- package/.docs/models/providers/wandb.md +3 -3
- package/.docs/models/providers/xai.md +4 -4
- package/.docs/models/providers/xiaomi.md +3 -3
- package/.docs/models/providers/zai-coding-plan.md +3 -3
- package/.docs/models/providers/zai.md +3 -3
- package/.docs/models/providers/zenmux.md +4 -4
- package/.docs/models/providers/zhipuai-coding-plan.md +3 -3
- package/.docs/models/providers/zhipuai.md +3 -3
- package/.docs/reference/agents/agent.md +3 -3
- package/.docs/reference/agents/generateLegacy.md +1 -1
- package/.docs/reference/agents/network.md +2 -2
- package/.docs/reference/ai-sdk/to-ai-sdk-stream.md +1 -1
- package/.docs/reference/auth/auth0.md +4 -4
- package/.docs/reference/auth/better-auth.md +2 -2
- package/.docs/reference/auth/clerk.md +1 -1
- package/.docs/reference/auth/firebase.md +4 -4
- package/.docs/reference/auth/jwt.md +1 -1
- package/.docs/reference/auth/supabase.md +1 -1
- package/.docs/reference/auth/workos.md +4 -4
- package/.docs/reference/cli/mastra.md +1 -1
- package/.docs/reference/client-js/agents.md +22 -22
- package/.docs/reference/client-js/error-handling.md +2 -2
- package/.docs/reference/client-js/logs.md +2 -2
- package/.docs/reference/client-js/mastra-client.md +1 -1
- package/.docs/reference/client-js/memory.md +6 -6
- package/.docs/reference/client-js/observability.md +4 -4
- package/.docs/reference/client-js/telemetry.md +1 -1
- package/.docs/reference/client-js/tools.md +3 -3
- package/.docs/reference/client-js/vectors.md +2 -2
- package/.docs/reference/client-js/workflows.md +12 -12
- package/.docs/reference/core/getGatewayById.md +1 -1
- package/.docs/reference/core/getMCPServer.md +2 -2
- package/.docs/reference/core/getMCPServerById.md +2 -2
- package/.docs/reference/core/getMemory.md +1 -1
- package/.docs/reference/core/getScorer.md +2 -2
- package/.docs/reference/core/getScorerById.md +2 -2
- package/.docs/reference/core/getStoredAgentById.md +2 -2
- package/.docs/reference/core/listMCPServers.md +2 -2
- package/.docs/reference/core/listMemory.md +1 -1
- package/.docs/reference/core/listScorers.md +1 -1
- package/.docs/reference/core/listStoredAgents.md +2 -2
- package/.docs/reference/core/mastra-class.md +1 -1
- package/.docs/reference/core/mastra-model-gateway.md +11 -11
- package/.docs/reference/datasets/dataset.md +1 -1
- package/.docs/reference/deployer.md +4 -4
- package/.docs/reference/evals/answer-relevancy.md +3 -3
- package/.docs/reference/evals/answer-similarity.md +3 -3
- package/.docs/reference/evals/bias.md +4 -4
- package/.docs/reference/evals/completeness.md +5 -5
- package/.docs/reference/evals/content-similarity.md +3 -3
- package/.docs/reference/evals/context-precision.md +6 -6
- package/.docs/reference/evals/context-relevance.md +6 -6
- package/.docs/reference/evals/create-scorer.md +7 -7
- package/.docs/reference/evals/faithfulness.md +3 -3
- package/.docs/reference/evals/hallucination.md +5 -5
- package/.docs/reference/evals/keyword-coverage.md +3 -3
- package/.docs/reference/evals/mastra-scorer.md +6 -6
- package/.docs/reference/evals/noise-sensitivity.md +9 -9
- package/.docs/reference/evals/prompt-alignment.md +5 -5
- package/.docs/reference/evals/run-evals.md +5 -5
- package/.docs/reference/evals/scorer-utils.md +17 -17
- package/.docs/reference/evals/textual-difference.md +3 -3
- package/.docs/reference/evals/tone-consistency.md +4 -4
- package/.docs/reference/evals/tool-call-accuracy.md +9 -9
- package/.docs/reference/evals/toxicity.md +3 -3
- package/.docs/reference/harness/harness-class.md +1 -1
- package/.docs/reference/memory/clone-utilities.md +7 -7
- package/.docs/reference/memory/cloneThread.md +4 -4
- package/.docs/reference/memory/createThread.md +1 -1
- package/.docs/reference/memory/deleteMessages.md +1 -1
- package/.docs/reference/memory/getThreadById.md +1 -1
- package/.docs/reference/memory/listThreads.md +3 -3
- package/.docs/reference/memory/memory-class.md +1 -1
- package/.docs/reference/memory/observational-memory.md +1 -1
- package/.docs/reference/memory/recall.md +1 -1
- package/.docs/reference/observability/tracing/bridges/otel.md +5 -5
- package/.docs/reference/observability/tracing/configuration.md +17 -17
- package/.docs/reference/observability/tracing/exporters/arize.md +4 -4
- package/.docs/reference/observability/tracing/exporters/braintrust.md +3 -3
- package/.docs/reference/observability/tracing/exporters/cloud-exporter.md +6 -6
- package/.docs/reference/observability/tracing/exporters/console-exporter.md +4 -4
- package/.docs/reference/observability/tracing/exporters/datadog.md +4 -4
- package/.docs/reference/observability/tracing/exporters/default-exporter.md +6 -6
- package/.docs/reference/observability/tracing/exporters/laminar.md +2 -2
- package/.docs/reference/observability/tracing/exporters/langfuse.md +4 -4
- package/.docs/reference/observability/tracing/exporters/langsmith.md +6 -6
- package/.docs/reference/observability/tracing/exporters/otel.md +12 -12
- package/.docs/reference/observability/tracing/exporters/posthog.md +3 -3
- package/.docs/reference/observability/tracing/exporters/sentry.md +5 -5
- package/.docs/reference/observability/tracing/instances.md +9 -9
- package/.docs/reference/observability/tracing/interfaces.md +39 -39
- package/.docs/reference/observability/tracing/processors/sensitive-data-filter.md +5 -5
- package/.docs/reference/observability/tracing/spans.md +13 -13
- package/.docs/reference/processors/processor-interface.md +15 -15
- package/.docs/reference/rag/chunk.md +2 -2
- package/.docs/reference/rag/database-config.md +8 -8
- package/.docs/reference/rag/document.md +11 -11
- package/.docs/reference/rag/embeddings.md +5 -5
- package/.docs/reference/rag/extract-params.md +8 -8
- package/.docs/reference/rag/graph-rag.md +4 -4
- package/.docs/reference/rag/metadata-filters.md +5 -5
- package/.docs/reference/rag/rerank.md +2 -2
- package/.docs/reference/rag/rerankWithScorer.md +2 -2
- package/.docs/reference/server/express-adapter.md +1 -1
- package/.docs/reference/server/fastify-adapter.md +1 -1
- package/.docs/reference/server/hono-adapter.md +1 -1
- package/.docs/reference/server/koa-adapter.md +1 -1
- package/.docs/reference/server/mastra-server.md +16 -16
- package/.docs/reference/server/register-api-route.md +5 -5
- package/.docs/reference/server/routes.md +1 -1
- package/.docs/reference/storage/cloudflare-d1.md +2 -2
- package/.docs/reference/storage/cloudflare.md +2 -2
- package/.docs/reference/storage/composite.md +1 -1
- package/.docs/reference/storage/convex.md +5 -5
- package/.docs/reference/storage/dynamodb.md +5 -5
- package/.docs/reference/storage/lance.md +3 -3
- package/.docs/reference/storage/libsql.md +1 -1
- package/.docs/reference/storage/mongodb.md +5 -5
- package/.docs/reference/storage/mssql.md +3 -3
- package/.docs/reference/storage/overview.md +2 -2
- package/.docs/reference/storage/postgresql.md +5 -5
- package/.docs/reference/storage/upstash.md +3 -3
- package/.docs/reference/streaming/ChunkType.md +13 -13
- package/.docs/reference/streaming/agents/MastraModelOutput.md +6 -6
- package/.docs/reference/streaming/agents/stream.md +2 -2
- package/.docs/reference/streaming/agents/streamLegacy.md +1 -1
- package/.docs/reference/streaming/workflows/observeStream.md +1 -1
- package/.docs/reference/streaming/workflows/resumeStream.md +1 -1
- package/.docs/reference/streaming/workflows/stream.md +1 -1
- package/.docs/reference/templates/overview.md +3 -3
- package/.docs/reference/tools/create-tool.md +9 -9
- package/.docs/reference/tools/document-chunker-tool.md +4 -4
- package/.docs/reference/tools/graph-rag-tool.md +7 -7
- package/.docs/reference/tools/mcp-client.md +13 -13
- package/.docs/reference/tools/mcp-server.md +23 -23
- package/.docs/reference/tools/vector-query-tool.md +12 -12
- package/.docs/reference/vectors/astra.md +13 -13
- package/.docs/reference/vectors/chroma.md +16 -16
- package/.docs/reference/vectors/convex.md +15 -15
- package/.docs/reference/vectors/couchbase.md +15 -15
- package/.docs/reference/vectors/duckdb.md +17 -17
- package/.docs/reference/vectors/elasticsearch.md +14 -14
- package/.docs/reference/vectors/lance.md +22 -22
- package/.docs/reference/vectors/libsql.md +15 -15
- package/.docs/reference/vectors/mongodb.md +18 -18
- package/.docs/reference/vectors/opensearch.md +11 -11
- package/.docs/reference/vectors/pg.md +21 -21
- package/.docs/reference/vectors/pinecone.md +15 -15
- package/.docs/reference/vectors/qdrant.md +15 -15
- package/.docs/reference/vectors/s3vectors.md +17 -17
- package/.docs/reference/vectors/turbopuffer.md +14 -14
- package/.docs/reference/vectors/upstash.md +15 -15
- package/.docs/reference/vectors/vectorize.md +16 -16
- package/.docs/reference/voice/azure.md +8 -8
- package/.docs/reference/voice/cloudflare.md +5 -5
- package/.docs/reference/voice/composite-voice.md +5 -5
- package/.docs/reference/voice/deepgram.md +5 -5
- package/.docs/reference/voice/elevenlabs.md +6 -6
- package/.docs/reference/voice/google-gemini-live.md +20 -20
- package/.docs/reference/voice/google.md +9 -9
- package/.docs/reference/voice/mastra-voice.md +17 -17
- package/.docs/reference/voice/murf.md +6 -6
- package/.docs/reference/voice/openai-realtime.md +16 -16
- package/.docs/reference/voice/openai.md +5 -5
- package/.docs/reference/voice/playai.md +5 -5
- package/.docs/reference/voice/sarvam.md +5 -5
- package/.docs/reference/voice/speechify.md +5 -5
- package/.docs/reference/voice/voice.addInstructions.md +2 -2
- package/.docs/reference/voice/voice.addTools.md +2 -2
- package/.docs/reference/voice/voice.answer.md +2 -2
- package/.docs/reference/voice/voice.close.md +2 -2
- package/.docs/reference/voice/voice.connect.md +5 -5
- package/.docs/reference/voice/voice.events.md +2 -2
- package/.docs/reference/voice/voice.getSpeakers.md +3 -3
- package/.docs/reference/voice/voice.listen.md +6 -6
- package/.docs/reference/voice/voice.off.md +2 -2
- package/.docs/reference/voice/voice.on.md +3 -3
- package/.docs/reference/voice/voice.send.md +2 -2
- package/.docs/reference/voice/voice.speak.md +5 -5
- package/.docs/reference/voice/voice.updateConfig.md +3 -3
- package/.docs/reference/workflows/run-methods/startAsync.md +1 -1
- package/.docs/reference/workflows/run.md +3 -3
- package/.docs/reference/workflows/step.md +2 -2
- package/.docs/reference/workflows/workflow-methods/create-run.md +1 -1
- package/.docs/reference/workflows/workflow.md +1 -1
- package/.docs/reference/workspace/daytona-sandbox.md +2 -2
- package/.docs/reference/workspace/e2b-sandbox.md +2 -2
- package/.docs/reference/workspace/filesystem.md +1 -1
- package/.docs/reference/workspace/gcs-filesystem.md +1 -1
- package/.docs/reference/workspace/local-filesystem.md +1 -1
- package/.docs/reference/workspace/local-sandbox.md +4 -4
- package/.docs/reference/workspace/process-manager.md +2 -2
- package/.docs/reference/workspace/s3-filesystem.md +1 -1
- package/.docs/reference/workspace/workspace-class.md +2 -2
- package/CHANGELOG.md +14 -0
- package/package.json +4 -4
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
Mastra provides a unified `createScorer` factory that allows you to define custom scorers for evaluating input/output pairs. You can use either native JavaScript functions or LLM-based prompt objects for each evaluation step. Custom scorers can be added to Agents and Workflow steps.
|
|
4
4
|
|
|
5
|
-
## How to
|
|
5
|
+
## How to create a custom scorer
|
|
6
6
|
|
|
7
7
|
Use the `createScorer` factory to define your scorer with a name, description, and optional judge configuration. Then chain step methods to build your evaluation pipeline. You must provide at least a `generateScore` step.
|
|
8
8
|
|
|
@@ -35,7 +35,7 @@ const scorer = createScorer({
|
|
|
35
35
|
})
|
|
36
36
|
```
|
|
37
37
|
|
|
38
|
-
## createScorer
|
|
38
|
+
## `createScorer` options
|
|
39
39
|
|
|
40
40
|
**id** (`string`): Unique identifier for the scorer. Used as the name if \`name\` is not provided.
|
|
41
41
|
|
|
@@ -57,7 +57,7 @@ The judge only runs for steps defined as **prompt objects** (`preprocess`, `anal
|
|
|
57
57
|
|
|
58
58
|
When a prompt-object step runs, its structured LLM output is stored in the corresponding result field (`preprocessStepResult`, `analyzeStepResult`, or the value consumed by `calculateScore` in `generateScore`).
|
|
59
59
|
|
|
60
|
-
## Type
|
|
60
|
+
## Type safety
|
|
61
61
|
|
|
62
62
|
You can specify input/output types when creating scorers for better type inference and IntelliSense support:
|
|
63
63
|
|
|
@@ -113,7 +113,7 @@ const customScorer = createScorer<CustomInput, CustomOutput>({
|
|
|
113
113
|
|
|
114
114
|
Using these types provides autocomplete, compile-time validation, and better documentation for your scoring logic.
|
|
115
115
|
|
|
116
|
-
## Trace
|
|
116
|
+
## Trace scoring with agent types
|
|
117
117
|
|
|
118
118
|
When you use `type: 'agent'`, your scorer is compatible for both adding directly to agents and scoring traces from agent interactions. The scorer automatically transforms trace data into the proper agent input/output format:
|
|
119
119
|
|
|
@@ -139,7 +139,7 @@ const mastra = new Mastra({
|
|
|
139
139
|
})
|
|
140
140
|
```
|
|
141
141
|
|
|
142
|
-
## Step
|
|
142
|
+
## Step method signatures
|
|
143
143
|
|
|
144
144
|
### preprocess
|
|
145
145
|
|
|
@@ -199,7 +199,7 @@ The method can return any value. The returned value will be available to subsequ
|
|
|
199
199
|
|
|
200
200
|
**judge** (`object`): (Optional) LLM judge for this step (can override main judge). See Judge Object section.
|
|
201
201
|
|
|
202
|
-
### generateScore
|
|
202
|
+
### `generateScore`
|
|
203
203
|
|
|
204
204
|
**Required** step that computes the final numerical score.
|
|
205
205
|
|
|
@@ -234,7 +234,7 @@ When using prompt object mode, you must also provide a `calculateScore` function
|
|
|
234
234
|
|
|
235
235
|
**calculateScore** (`function`): Function: ({ run, results, analyzeStepResult }) => number. Converts the LLM's structured output into a numerical score.
|
|
236
236
|
|
|
237
|
-
### generateReason
|
|
237
|
+
### `generateReason`
|
|
238
238
|
|
|
239
239
|
Optional step that provides an explanation for the score.
|
|
240
240
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Faithfulness
|
|
1
|
+
# Faithfulness scorer
|
|
2
2
|
|
|
3
3
|
The `createFaithfulnessScorer()` function evaluates how factually accurate an LLM's output is compared to the provided context. It extracts claims from the output and verifies them against the context, making it essential to measure RAG pipeline responses' reliability.
|
|
4
4
|
|
|
@@ -14,7 +14,7 @@ The `createFaithfulnessScorer()` function accepts a single options object with t
|
|
|
14
14
|
|
|
15
15
|
This function returns an instance of the MastraScorer class. The `.run()` method accepts the same input as other scorers (see the [MastraScorer reference](https://mastra.ai/reference/evals/mastra-scorer)), but the return value includes LLM-specific fields as documented below.
|
|
16
16
|
|
|
17
|
-
##
|
|
17
|
+
## `.run()` returns
|
|
18
18
|
|
|
19
19
|
**runId** (`string`): The id of the run (optional).
|
|
20
20
|
|
|
@@ -32,7 +32,7 @@ This function returns an instance of the MastraScorer class. The `.run()` method
|
|
|
32
32
|
|
|
33
33
|
**generateReasonPrompt** (`string`): The prompt sent to the LLM for the generateReason step (optional).
|
|
34
34
|
|
|
35
|
-
## Scoring
|
|
35
|
+
## Scoring details
|
|
36
36
|
|
|
37
37
|
The scorer evaluates faithfulness through claim verification against provided context.
|
|
38
38
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Hallucination
|
|
1
|
+
# Hallucination scorer
|
|
2
2
|
|
|
3
3
|
The `createHallucinationScorer()` function evaluates whether an LLM generates factually correct information by comparing its output against the provided context. This scorer measures hallucination by identifying direct contradictions between the context and the output.
|
|
4
4
|
|
|
@@ -18,7 +18,7 @@ The `createHallucinationScorer()` function accepts a single options object with
|
|
|
18
18
|
|
|
19
19
|
This function returns an instance of the MastraScorer class. The `.run()` method accepts the same input as other scorers (see the [MastraScorer reference](https://mastra.ai/reference/evals/mastra-scorer)), but the return value includes LLM-specific fields as documented below.
|
|
20
20
|
|
|
21
|
-
##
|
|
21
|
+
## `.run()` returns
|
|
22
22
|
|
|
23
23
|
**runId** (`string`): The id of the run (optional).
|
|
24
24
|
|
|
@@ -36,7 +36,7 @@ This function returns an instance of the MastraScorer class. The `.run()` method
|
|
|
36
36
|
|
|
37
37
|
**generateReasonPrompt** (`string`): The prompt sent to the LLM for the generateReason step (optional).
|
|
38
38
|
|
|
39
|
-
## Scoring
|
|
39
|
+
## Scoring details
|
|
40
40
|
|
|
41
41
|
The scorer evaluates hallucination through contradiction detection and unsupported claim analysis.
|
|
42
42
|
|
|
@@ -115,7 +115,7 @@ const scorer = createHallucinationScorer({
|
|
|
115
115
|
})
|
|
116
116
|
```
|
|
117
117
|
|
|
118
|
-
### Dynamic Context with getContext
|
|
118
|
+
### Dynamic Context with `getContext`
|
|
119
119
|
|
|
120
120
|
Use `getContext` for live scoring scenarios where context comes from tool results:
|
|
121
121
|
|
|
@@ -164,7 +164,7 @@ const agent = new Agent({
|
|
|
164
164
|
})
|
|
165
165
|
```
|
|
166
166
|
|
|
167
|
-
### Batch Evaluation with runEvals
|
|
167
|
+
### Batch Evaluation with `runEvals`
|
|
168
168
|
|
|
169
169
|
```typescript
|
|
170
170
|
import { runEvals } from '@mastra/core/evals'
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Keyword
|
|
1
|
+
# Keyword coverage scorer
|
|
2
2
|
|
|
3
3
|
The `createKeywordCoverageScorer()` function evaluates how well an LLM's output covers the important keywords from the input. It analyzes keyword presence and matches while ignoring common words and stop words.
|
|
4
4
|
|
|
@@ -8,7 +8,7 @@ The `createKeywordCoverageScorer()` function doesn't take any options.
|
|
|
8
8
|
|
|
9
9
|
This function returns an instance of the MastraScorer class. See the [MastraScorer reference](https://mastra.ai/reference/evals/mastra-scorer) for details on the `.run()` method and its input/output.
|
|
10
10
|
|
|
11
|
-
##
|
|
11
|
+
## `.run()` returns
|
|
12
12
|
|
|
13
13
|
**runId** (`string`): The id of the run (optional).
|
|
14
14
|
|
|
@@ -35,7 +35,7 @@ This function returns an instance of the MastraScorer class. See the [MastraScor
|
|
|
35
35
|
}
|
|
36
36
|
```
|
|
37
37
|
|
|
38
|
-
## Scoring
|
|
38
|
+
## Scoring details
|
|
39
39
|
|
|
40
40
|
The scorer evaluates keyword coverage by matching keywords with the following features:
|
|
41
41
|
|
|
@@ -4,7 +4,7 @@ The `MastraScorer` class is the base class for all scorers in Mastra. It provide
|
|
|
4
4
|
|
|
5
5
|
**Note:** Most users should use [`createScorer`](https://mastra.ai/reference/evals/create-scorer) to create scorer instances. Direct instantiation of `MastraScorer` isn't recommended.
|
|
6
6
|
|
|
7
|
-
## How to
|
|
7
|
+
## How to get a `MastraScorer` instance
|
|
8
8
|
|
|
9
9
|
Use the `createScorer` factory function, which returns a `MastraScorer` instance:
|
|
10
10
|
|
|
@@ -22,7 +22,7 @@ const scorer = createScorer({
|
|
|
22
22
|
// scorer is now a MastraScorer instance
|
|
23
23
|
```
|
|
24
24
|
|
|
25
|
-
##
|
|
25
|
+
## `.run()` method
|
|
26
26
|
|
|
27
27
|
The `.run()` method is the primary way to execute your scorer and evaluate input/output pairs. It processes the data through your defined steps (preprocess → analyze → generateScore → generateReason) and returns a comprehensive result object with the score, reasoning, and intermediate results.
|
|
28
28
|
|
|
@@ -37,7 +37,7 @@ const result = await scorer.run({
|
|
|
37
37
|
})
|
|
38
38
|
```
|
|
39
39
|
|
|
40
|
-
##
|
|
40
|
+
## `.run()` input
|
|
41
41
|
|
|
42
42
|
**input** (`any`): Input data to be evaluated. Can be any type depending on your scorer's requirements.
|
|
43
43
|
|
|
@@ -49,7 +49,7 @@ const result = await scorer.run({
|
|
|
49
49
|
|
|
50
50
|
**groundTruth** (`any`): Optional expected or reference output for comparison during scoring. Automatically passed when using runEvals.
|
|
51
51
|
|
|
52
|
-
##
|
|
52
|
+
## `.run()` returns
|
|
53
53
|
|
|
54
54
|
**runId** (`string`): The unique identifier for this scoring run.
|
|
55
55
|
|
|
@@ -69,7 +69,7 @@ const result = await scorer.run({
|
|
|
69
69
|
|
|
70
70
|
**generateReasonPrompt** (`string`): Generate reason prompt, if defined (optional).
|
|
71
71
|
|
|
72
|
-
## Step
|
|
72
|
+
## Step execution flow
|
|
73
73
|
|
|
74
74
|
When you call `.run()`, the MastraScorer executes the defined steps in this order:
|
|
75
75
|
|
|
@@ -80,7 +80,7 @@ When you call `.run()`, the MastraScorer executes the defined steps in this orde
|
|
|
80
80
|
|
|
81
81
|
Each step receives the results from previous steps, allowing you to build complex evaluation pipelines.
|
|
82
82
|
|
|
83
|
-
## Usage
|
|
83
|
+
## Usage example
|
|
84
84
|
|
|
85
85
|
```typescript
|
|
86
86
|
const scorer = createScorer({
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Noise
|
|
1
|
+
# Noise sensitivity scorer
|
|
2
2
|
|
|
3
3
|
The `createNoiseSensitivityScorerLLM()` function creates a **CI/testing scorer** that evaluates how robust an agent is when exposed to irrelevant, distracting, or misleading information. Unlike live scorers that evaluate single production runs, this scorer requires predetermined test data including both baseline responses and noisy variations.
|
|
4
4
|
|
|
@@ -17,7 +17,7 @@ Before using the noise sensitivity scorer, prepare your test data:
|
|
|
17
17
|
|
|
18
18
|
**options** (`NoiseSensitivityOptions`): Configuration options for the scorer
|
|
19
19
|
|
|
20
|
-
## CI/
|
|
20
|
+
## CI/testing requirements
|
|
21
21
|
|
|
22
22
|
This scorer is designed exclusively for CI/testing environments and has specific requirements:
|
|
23
23
|
|
|
@@ -80,13 +80,13 @@ describe('Agent Noise Resistance Tests', () => {
|
|
|
80
80
|
})
|
|
81
81
|
```
|
|
82
82
|
|
|
83
|
-
##
|
|
83
|
+
## `.run()` returns
|
|
84
84
|
|
|
85
85
|
**score** (`number`): Robustness score between 0 and 1 (1.0 = completely robust, 0.0 = severely compromised)
|
|
86
86
|
|
|
87
87
|
**reason** (`string`): Human-readable explanation of how noise affected the agent's response
|
|
88
88
|
|
|
89
|
-
## Evaluation
|
|
89
|
+
## Evaluation dimensions
|
|
90
90
|
|
|
91
91
|
The Noise Sensitivity scorer analyzes five key dimensions:
|
|
92
92
|
|
|
@@ -110,7 +110,7 @@ Compares how similar the responses are in their core message and conclusions. Ev
|
|
|
110
110
|
|
|
111
111
|
Checks if noise causes the agent to generate false or fabricated information that wasn't present in either the query or the noise.
|
|
112
112
|
|
|
113
|
-
## Scoring
|
|
113
|
+
## Scoring algorithm
|
|
114
114
|
|
|
115
115
|
### Formula
|
|
116
116
|
|
|
@@ -138,7 +138,7 @@ Each dimension receives an impact level with corresponding weights:
|
|
|
138
138
|
|
|
139
139
|
When the LLM's direct score and the calculated score diverge by more than the discrepancy threshold, the scorer uses the lower (more conservative) score to ensure reliable evaluation.
|
|
140
140
|
|
|
141
|
-
## Noise
|
|
141
|
+
## Noise types
|
|
142
142
|
|
|
143
143
|
### Misinformation
|
|
144
144
|
|
|
@@ -158,7 +158,7 @@ Deliberately conflicting instructions designed to confuse.
|
|
|
158
158
|
|
|
159
159
|
Example: "Write a summary of this article. Actually, ignore that and tell me about dogs instead."
|
|
160
160
|
|
|
161
|
-
## CI/
|
|
161
|
+
## CI/testing usage patterns
|
|
162
162
|
|
|
163
163
|
### Integration Testing
|
|
164
164
|
|
|
@@ -468,7 +468,7 @@ const strictScorer = createNoiseSensitivityScorerLLM({
|
|
|
468
468
|
})
|
|
469
469
|
```
|
|
470
470
|
|
|
471
|
-
## CI
|
|
471
|
+
## CI test suite: Testing different noise types
|
|
472
472
|
|
|
473
473
|
Create comprehensive test suites to evaluate agent performance across various noise categories in your CI pipeline:
|
|
474
474
|
|
|
@@ -537,7 +537,7 @@ async function evaluateNoiseResistance(testCases) {
|
|
|
537
537
|
}
|
|
538
538
|
```
|
|
539
539
|
|
|
540
|
-
## CI
|
|
540
|
+
## CI pipeline: Batch evaluation for model comparison
|
|
541
541
|
|
|
542
542
|
Use in your CI pipeline to compare noise resistance across different models before deployment:
|
|
543
543
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Prompt
|
|
1
|
+
# Prompt alignment scorer
|
|
2
2
|
|
|
3
3
|
The `createPromptAlignmentScorerLLM()` function creates a scorer that evaluates how well agent responses align with user prompts across multiple dimensions: intent understanding, requirement fulfillment, response completeness, and format appropriateness.
|
|
4
4
|
|
|
@@ -8,7 +8,7 @@ The `createPromptAlignmentScorerLLM()` function creates a scorer that evaluates
|
|
|
8
8
|
|
|
9
9
|
**options** (`PromptAlignmentOptions`): Configuration options for the scorer
|
|
10
10
|
|
|
11
|
-
##
|
|
11
|
+
## `.run()` returns
|
|
12
12
|
|
|
13
13
|
**score** (`number`): Multi-dimensional alignment score between 0 and scale (default 0-1)
|
|
14
14
|
|
|
@@ -52,7 +52,7 @@ The `createPromptAlignmentScorerLLM()` function creates a scorer that evaluates
|
|
|
52
52
|
}
|
|
53
53
|
```
|
|
54
54
|
|
|
55
|
-
## Scoring
|
|
55
|
+
## Scoring details
|
|
56
56
|
|
|
57
57
|
### Scorer configuration
|
|
58
58
|
|
|
@@ -163,7 +163,7 @@ Final Score = Weighted Score × scale
|
|
|
163
163
|
- Production monitoring where both user and system requirements matter
|
|
164
164
|
- Holistic assessment of prompt-response alignment
|
|
165
165
|
|
|
166
|
-
## Common
|
|
166
|
+
## Common use cases
|
|
167
167
|
|
|
168
168
|
### Code Generation Evaluation
|
|
169
169
|
|
|
@@ -597,7 +597,7 @@ const result = await scorer.run({
|
|
|
597
597
|
// High score for both addressing the user's request AND following system guidelines
|
|
598
598
|
```
|
|
599
599
|
|
|
600
|
-
## Comparison with
|
|
600
|
+
## Comparison with other scorers
|
|
601
601
|
|
|
602
602
|
| Aspect | Prompt Alignment | Answer Relevancy | Faithfulness |
|
|
603
603
|
| -------------- | ------------------------------------------ | ---------------------------- | -------------------------------- |
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
The `runEvals` function enables batch evaluation of agents and workflows by running multiple test cases against scorers concurrently. This is essential for systematic testing, performance analysis, and validation of AI systems.
|
|
4
4
|
|
|
5
|
-
## Usage
|
|
5
|
+
## Usage example
|
|
6
6
|
|
|
7
7
|
```typescript
|
|
8
8
|
import { runEvals } from '@mastra/core/evals'
|
|
@@ -43,7 +43,7 @@ console.log(`Processed ${result.summary.totalItems} items`)
|
|
|
43
43
|
|
|
44
44
|
**onItemComplete** (`function`): Callback function called after each test case completes. Receives item, target result, and scorer results.
|
|
45
45
|
|
|
46
|
-
## Data
|
|
46
|
+
## Data item structure
|
|
47
47
|
|
|
48
48
|
**input** (`string | string[] | CoreMessage[] | any`): Input data for the target. For agents: messages or strings. For workflows: workflow input data.
|
|
49
49
|
|
|
@@ -55,7 +55,7 @@ console.log(`Processed ${result.summary.totalItems} items`)
|
|
|
55
55
|
|
|
56
56
|
**startOptions** (`WorkflowRunOptions`): Per-item workflow run options (e.g. initialState, perStep, outputOptions). Merged on top of targetOptions, so per-item values take precedence. Only applicable when the target is a workflow.
|
|
57
57
|
|
|
58
|
-
## Workflow
|
|
58
|
+
## Workflow scorer configuration
|
|
59
59
|
|
|
60
60
|
For workflows, you can specify scorers at different levels using `WorkflowScorerConfig`:
|
|
61
61
|
|
|
@@ -105,7 +105,7 @@ const result = await runEvals({
|
|
|
105
105
|
})
|
|
106
106
|
```
|
|
107
107
|
|
|
108
|
-
### Agent with targetOptions
|
|
108
|
+
### Agent with `targetOptions`
|
|
109
109
|
|
|
110
110
|
Pass execution options like `maxSteps` or `modelSettings` to customize agent behavior during evaluation:
|
|
111
111
|
|
|
@@ -149,7 +149,7 @@ const workflowResult = await runEvals({
|
|
|
149
149
|
})
|
|
150
150
|
```
|
|
151
151
|
|
|
152
|
-
### Workflow with per-item startOptions
|
|
152
|
+
### Workflow with per-item `startOptions`
|
|
153
153
|
|
|
154
154
|
Use `startOptions` on individual data items to customize each workflow run. Per-item values take precedence over `targetOptions`:
|
|
155
155
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Scorer
|
|
1
|
+
# Scorer utils
|
|
2
2
|
|
|
3
3
|
Mastra provides utility functions to help extract and process data from scorer run inputs and outputs. These utilities are particularly useful in the `preprocess` step of custom scorers.
|
|
4
4
|
|
|
@@ -17,9 +17,9 @@ import {
|
|
|
17
17
|
} from '@mastra/evals/scorers/utils'
|
|
18
18
|
```
|
|
19
19
|
|
|
20
|
-
## Message
|
|
20
|
+
## Message extraction
|
|
21
21
|
|
|
22
|
-
### getAssistantMessageFromRunOutput
|
|
22
|
+
### `getAssistantMessageFromRunOutput`
|
|
23
23
|
|
|
24
24
|
Extracts the text content from the first assistant message in the run output.
|
|
25
25
|
|
|
@@ -42,7 +42,7 @@ const scorer = createScorer({
|
|
|
42
42
|
|
|
43
43
|
**Returns:** `string | undefined` - The assistant message text, or undefined if no assistant message is found.
|
|
44
44
|
|
|
45
|
-
### getUserMessageFromRunInput
|
|
45
|
+
### `getUserMessageFromRunInput`
|
|
46
46
|
|
|
47
47
|
Extracts the text content from the first user message in the run input.
|
|
48
48
|
|
|
@@ -57,7 +57,7 @@ Extracts the text content from the first user message in the run input.
|
|
|
57
57
|
|
|
58
58
|
**Returns:** `string | undefined` - The user message text, or undefined if no user message is found.
|
|
59
59
|
|
|
60
|
-
### extractInputMessages
|
|
60
|
+
### `extractInputMessages`
|
|
61
61
|
|
|
62
62
|
Extracts text content from all input messages as an array.
|
|
63
63
|
|
|
@@ -70,7 +70,7 @@ Extracts text content from all input messages as an array.
|
|
|
70
70
|
|
|
71
71
|
**Returns:** `string[]` - Array of text strings from each input message.
|
|
72
72
|
|
|
73
|
-
### extractAgentResponseMessages
|
|
73
|
+
### `extractAgentResponseMessages`
|
|
74
74
|
|
|
75
75
|
Extracts text content from all assistant response messages as an array.
|
|
76
76
|
|
|
@@ -83,9 +83,9 @@ Extracts text content from all assistant response messages as an array.
|
|
|
83
83
|
|
|
84
84
|
**Returns:** `string[]` - Array of text strings from each assistant message.
|
|
85
85
|
|
|
86
|
-
## Reasoning
|
|
86
|
+
## Reasoning extraction
|
|
87
87
|
|
|
88
|
-
### getReasoningFromRunOutput
|
|
88
|
+
### `getReasoningFromRunOutput`
|
|
89
89
|
|
|
90
90
|
Extracts reasoning text from the run output. This is particularly useful when evaluating responses from reasoning models like `deepseek-reasoner` that produce chain-of-thought reasoning.
|
|
91
91
|
|
|
@@ -138,9 +138,9 @@ const reasoningQualityScorer = createScorer({
|
|
|
138
138
|
|
|
139
139
|
**Returns:** `string | undefined` - The reasoning text, or undefined if no reasoning is present.
|
|
140
140
|
|
|
141
|
-
## System
|
|
141
|
+
## System message extraction
|
|
142
142
|
|
|
143
|
-
### getSystemMessagesFromRunInput
|
|
143
|
+
### `getSystemMessagesFromRunInput`
|
|
144
144
|
|
|
145
145
|
Extracts all system messages from the run input, including both standard system messages and tagged system messages (specialized prompts like memory instructions).
|
|
146
146
|
|
|
@@ -156,7 +156,7 @@ Extracts all system messages from the run input, including both standard system
|
|
|
156
156
|
|
|
157
157
|
**Returns:** `string[]` - Array of system message strings.
|
|
158
158
|
|
|
159
|
-
### getCombinedSystemPrompt
|
|
159
|
+
### `getCombinedSystemPrompt`
|
|
160
160
|
|
|
161
161
|
Combines all system messages into a single prompt string, joined with double newlines.
|
|
162
162
|
|
|
@@ -169,9 +169,9 @@ Combines all system messages into a single prompt string, joined with double new
|
|
|
169
169
|
|
|
170
170
|
**Returns:** `string` - Combined system prompt string.
|
|
171
171
|
|
|
172
|
-
## Tool
|
|
172
|
+
## Tool call extraction
|
|
173
173
|
|
|
174
|
-
### extractToolCalls
|
|
174
|
+
### `extractToolCalls`
|
|
175
175
|
|
|
176
176
|
Extracts information about all tool calls from the run output, including tool names, call IDs, and their positions in the message array.
|
|
177
177
|
|
|
@@ -216,11 +216,11 @@ type ToolCallInfo = {
|
|
|
216
216
|
}
|
|
217
217
|
```
|
|
218
218
|
|
|
219
|
-
## Test
|
|
219
|
+
## Test utilities
|
|
220
220
|
|
|
221
221
|
These utilities help create test data for scorer development.
|
|
222
222
|
|
|
223
|
-
### createTestMessage
|
|
223
|
+
### `createTestMessage`
|
|
224
224
|
|
|
225
225
|
Creates a `MastraDBMessage` object for testing purposes.
|
|
226
226
|
|
|
@@ -247,7 +247,7 @@ const assistantMessage = createTestMessage({
|
|
|
247
247
|
})
|
|
248
248
|
```
|
|
249
249
|
|
|
250
|
-
### createAgentTestRun
|
|
250
|
+
### `createAgentTestRun`
|
|
251
251
|
|
|
252
252
|
Creates a complete test run object for testing scorers.
|
|
253
253
|
|
|
@@ -266,7 +266,7 @@ const result = await myScorer.run({
|
|
|
266
266
|
})
|
|
267
267
|
```
|
|
268
268
|
|
|
269
|
-
## Complete
|
|
269
|
+
## Complete example
|
|
270
270
|
|
|
271
271
|
Here's a complete example showing how to use multiple utilities together:
|
|
272
272
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Textual
|
|
1
|
+
# Textual difference scorer
|
|
2
2
|
|
|
3
3
|
The `createTextualDifferenceScorer()` function uses sequence matching to measure the textual differences between two strings. It provides detailed information about changes, including the number of operations needed to transform one text into another.
|
|
4
4
|
|
|
@@ -8,7 +8,7 @@ The `createTextualDifferenceScorer()` function doesn't take any options.
|
|
|
8
8
|
|
|
9
9
|
This function returns an instance of the MastraScorer class. See the [MastraScorer reference](https://mastra.ai/reference/evals/mastra-scorer) for details on the `.run()` method and its input/output.
|
|
10
10
|
|
|
11
|
-
##
|
|
11
|
+
## `.run()` returns
|
|
12
12
|
|
|
13
13
|
**runId** (`string`): The id of the run (optional).
|
|
14
14
|
|
|
@@ -31,7 +31,7 @@ This function returns an instance of the MastraScorer class. See the [MastraScor
|
|
|
31
31
|
}
|
|
32
32
|
```
|
|
33
33
|
|
|
34
|
-
## Scoring
|
|
34
|
+
## Scoring details
|
|
35
35
|
|
|
36
36
|
The scorer calculates several measures:
|
|
37
37
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Tone
|
|
1
|
+
# Tone consistency scorer
|
|
2
2
|
|
|
3
3
|
The `createToneScorer()` function evaluates the text's emotional tone and sentiment consistency. It can operate in two modes: comparing tone between input/output pairs or analyzing tone stability within a single text.
|
|
4
4
|
|
|
@@ -8,7 +8,7 @@ The `createToneScorer()` function doesn't take any options.
|
|
|
8
8
|
|
|
9
9
|
This function returns an instance of the MastraScorer class. See the [MastraScorer reference](https://mastra.ai/reference/evals/mastra-scorer) for details on the `.run()` method and its input/output.
|
|
10
10
|
|
|
11
|
-
##
|
|
11
|
+
## `.run()` returns
|
|
12
12
|
|
|
13
13
|
**runId** (`string`): The id of the run (optional).
|
|
14
14
|
|
|
@@ -32,7 +32,7 @@ This function returns an instance of the MastraScorer class. See the [MastraScor
|
|
|
32
32
|
}
|
|
33
33
|
```
|
|
34
34
|
|
|
35
|
-
## Scoring
|
|
35
|
+
## Scoring details
|
|
36
36
|
|
|
37
37
|
The scorer evaluates sentiment consistency through tone pattern analysis and mode-specific scoring.
|
|
38
38
|
|
|
@@ -65,7 +65,7 @@ Final score: `mode_specific_score * scale`
|
|
|
65
65
|
- 0.1-0.3: Poor consistency with major tone changes
|
|
66
66
|
- 0.0: No consistency - completely different tones
|
|
67
67
|
|
|
68
|
-
### analyzeStepResult
|
|
68
|
+
### `analyzeStepResult`
|
|
69
69
|
|
|
70
70
|
Object with tone metrics:
|
|
71
71
|
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
# Tool
|
|
1
|
+
# Tool call accuracy scorers
|
|
2
2
|
|
|
3
3
|
Mastra provides two tool call accuracy scorers for evaluating whether an LLM selects the correct tools from available options:
|
|
4
4
|
|
|
5
5
|
1. **Code-based scorer** - Deterministic evaluation using exact tool matching
|
|
6
6
|
2. **LLM-based scorer** - Semantic evaluation using AI to assess appropriateness
|
|
7
7
|
|
|
8
|
-
## Choosing
|
|
8
|
+
## Choosing between scorers
|
|
9
9
|
|
|
10
10
|
### Use the Code-Based Scorer When:
|
|
11
11
|
|
|
@@ -23,7 +23,7 @@ Mastra provides two tool call accuracy scorers for evaluating whether an LLM sel
|
|
|
23
23
|
- You need **explanations** for scoring decisions
|
|
24
24
|
- You're evaluating **production agent behavior**
|
|
25
25
|
|
|
26
|
-
## Code-
|
|
26
|
+
## Code-based tool call accuracy scorer
|
|
27
27
|
|
|
28
28
|
The `createToolCallAccuracyScorerCode()` function from `@mastra/evals/scorers/prebuilt` provides deterministic binary scoring based on exact tool matching and supports both strict and lenient evaluation modes, as well as tool calling order validation.
|
|
29
29
|
|
|
@@ -55,7 +55,7 @@ When `expectedToolOrder` is provided, the scorer validates tool calling sequence
|
|
|
55
55
|
- **Strict Order (strictMode: true)**: Tools must be called in exactly the specified order with no extra tools
|
|
56
56
|
- **Flexible Order (strictMode: false)**: Expected tools must appear in correct relative order (extra tools allowed)
|
|
57
57
|
|
|
58
|
-
## Code-
|
|
58
|
+
## Code-based scoring details
|
|
59
59
|
|
|
60
60
|
- **Binary scores**: Always returns 0 or 1
|
|
61
61
|
- **Deterministic**: Same input always produces same output
|
|
@@ -103,7 +103,7 @@ const strictOrderScorer = createCodeScorer({
|
|
|
103
103
|
}
|
|
104
104
|
```
|
|
105
105
|
|
|
106
|
-
## Code-
|
|
106
|
+
## Code-based scorer examples
|
|
107
107
|
|
|
108
108
|
The code-based scorer provides deterministic, binary scoring (0 or 1) based on exact tool matching.
|
|
109
109
|
|
|
@@ -271,7 +271,7 @@ const result = await flexibleOrderScorer.run(run)
|
|
|
271
271
|
console.log(result.score) // 1 - auth-tool comes before fetch-tool
|
|
272
272
|
```
|
|
273
273
|
|
|
274
|
-
## LLM-
|
|
274
|
+
## LLM-based tool call accuracy scorer
|
|
275
275
|
|
|
276
276
|
The `createToolCallAccuracyScorerLLM()` function from `@mastra/evals/scorers/prebuilt` uses an LLM to evaluate whether the tools called by an agent are appropriate for the given user request, providing semantic evaluation rather than exact matching.
|
|
277
277
|
|
|
@@ -298,7 +298,7 @@ The LLM-based scorer provides:
|
|
|
298
298
|
3. **Generate Score**: Calculates score based on appropriate vs total tool calls
|
|
299
299
|
4. **Generate Reasoning**: Provides human-readable explanation
|
|
300
300
|
|
|
301
|
-
## LLM-
|
|
301
|
+
## LLM-based scoring details
|
|
302
302
|
|
|
303
303
|
- **Fractional scores**: Returns values between 0.0 and 1.0
|
|
304
304
|
- **Context-aware**: Considers user intent and appropriateness
|
|
@@ -341,7 +341,7 @@ const customModelScorer = createLLMScorer({
|
|
|
341
341
|
}
|
|
342
342
|
```
|
|
343
343
|
|
|
344
|
-
## LLM-
|
|
344
|
+
## LLM-based scorer examples
|
|
345
345
|
|
|
346
346
|
The LLM-based scorer uses AI to evaluate whether tool selections are appropriate for the user's request.
|
|
347
347
|
|
|
@@ -465,7 +465,7 @@ console.log(result.score); // 1.0 - appropriate to ask for clarification
|
|
|
465
465
|
console.log(result.reason); // "The agent appropriately asked for clarification rather than calling tools with insufficient information."
|
|
466
466
|
```
|
|
467
467
|
|
|
468
|
-
## Comparing
|
|
468
|
+
## Comparing both scorers
|
|
469
469
|
|
|
470
470
|
Here's an example using both scorers on the same data:
|
|
471
471
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Toxicity
|
|
1
|
+
# Toxicity scorer
|
|
2
2
|
|
|
3
3
|
The `createToxicityScorer()` function evaluates whether an LLM's output contains racist, biased, or toxic elements. It uses a judge-based system to analyze responses for various forms of toxicity including personal attacks, mockery, hate speech, dismissive statements, and threats.
|
|
4
4
|
|
|
@@ -12,7 +12,7 @@ The `createToxicityScorer()` function accepts a single options object with the f
|
|
|
12
12
|
|
|
13
13
|
This function returns an instance of the MastraScorer class. The `.run()` method accepts the same input as other scorers (see the [MastraScorer reference](https://mastra.ai/reference/evals/mastra-scorer)), but the return value includes LLM-specific fields as documented below.
|
|
14
14
|
|
|
15
|
-
##
|
|
15
|
+
## `.run()` returns
|
|
16
16
|
|
|
17
17
|
**runId** (`string`): The id of the run (optional).
|
|
18
18
|
|
|
@@ -41,7 +41,7 @@ This function returns an instance of the MastraScorer class. The `.run()` method
|
|
|
41
41
|
}
|
|
42
42
|
```
|
|
43
43
|
|
|
44
|
-
## Scoring
|
|
44
|
+
## Scoring details
|
|
45
45
|
|
|
46
46
|
The scorer evaluates toxicity through multiple aspects:
|
|
47
47
|
|