@mastra/mcp-docs-server 0.13.37 → 0.13.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/organized/changelogs/%40internal%2Fchangeset-cli.md +2 -0
- package/.docs/organized/changelogs/%40internal%2Fexternal-types.md +2 -0
- package/.docs/organized/changelogs/%40internal%2Fstorage-test-utils.md +8 -8
- package/.docs/organized/changelogs/%40internal%2Ftypes-builder.md +2 -0
- package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +23 -23
- package/.docs/organized/changelogs/%40mastra%2Fcore.md +69 -69
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloud.md +17 -17
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +17 -17
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-netlify.md +17 -17
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-vercel.md +17 -17
- package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +25 -25
- package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +15 -15
- package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +27 -27
- package/.docs/organized/changelogs/%40mastra%2Freact.md +15 -15
- package/.docs/organized/changelogs/%40mastra%2Fserver.md +23 -23
- package/.docs/organized/changelogs/create-mastra.md +5 -5
- package/.docs/organized/changelogs/mastra.md +17 -17
- package/.docs/raw/agents/adding-voice.mdx +20 -9
- package/.docs/raw/agents/agent-memory.mdx +55 -39
- package/.docs/raw/agents/guardrails.mdx +68 -61
- package/.docs/raw/agents/networks.mdx +50 -46
- package/.docs/raw/agents/overview.mdx +125 -88
- package/.docs/raw/agents/using-tools.mdx +14 -15
- package/.docs/raw/auth/auth0.mdx +28 -27
- package/.docs/raw/auth/clerk.mdx +22 -20
- package/.docs/raw/auth/firebase.mdx +42 -39
- package/.docs/raw/auth/index.mdx +1 -1
- package/.docs/raw/auth/jwt.mdx +18 -16
- package/.docs/raw/auth/supabase.mdx +20 -18
- package/.docs/raw/auth/workos.mdx +32 -26
- package/.docs/raw/community/contributing-templates.mdx +7 -7
- package/.docs/raw/community/discord.mdx +2 -2
- package/.docs/raw/community/licensing.mdx +1 -1
- package/.docs/raw/course/03-agent-memory/26-updating-mastra-export-comprehensive.md +0 -32
- package/.docs/raw/deployment/cloud-providers/amazon-ec2.mdx +41 -22
- package/.docs/raw/deployment/cloud-providers/aws-lambda.mdx +48 -29
- package/.docs/raw/deployment/cloud-providers/azure-app-services.mdx +52 -24
- package/.docs/raw/deployment/cloud-providers/digital-ocean.mdx +86 -55
- package/.docs/raw/deployment/cloud-providers/index.mdx +16 -13
- package/.docs/raw/deployment/monorepo.mdx +32 -42
- package/.docs/raw/deployment/overview.mdx +15 -15
- package/.docs/raw/deployment/server-deployment.mdx +18 -23
- package/.docs/raw/deployment/serverless-platforms/cloudflare-deployer.mdx +16 -17
- package/.docs/raw/deployment/serverless-platforms/index.mdx +15 -12
- package/.docs/raw/deployment/serverless-platforms/netlify-deployer.mdx +14 -23
- package/.docs/raw/deployment/serverless-platforms/vercel-deployer.mdx +16 -23
- package/.docs/raw/deployment/web-framework.mdx +14 -14
- package/.docs/raw/frameworks/agentic-uis/ai-sdk.mdx +138 -145
- package/.docs/raw/frameworks/agentic-uis/assistant-ui.mdx +54 -43
- package/.docs/raw/frameworks/agentic-uis/cedar-os.mdx +51 -36
- package/.docs/raw/frameworks/agentic-uis/copilotkit.mdx +161 -120
- package/.docs/raw/frameworks/agentic-uis/openrouter.mdx +59 -48
- package/.docs/raw/frameworks/servers/express.mdx +45 -44
- package/.docs/raw/frameworks/web-frameworks/astro.mdx +245 -162
- package/.docs/raw/frameworks/web-frameworks/next-js.mdx +112 -69
- package/.docs/raw/frameworks/web-frameworks/sveltekit.mdx +189 -164
- package/.docs/raw/frameworks/web-frameworks/vite-react.mdx +94 -81
- package/.docs/raw/getting-started/installation.mdx +164 -133
- package/.docs/raw/getting-started/mcp-docs-server.mdx +36 -39
- package/.docs/raw/getting-started/project-structure.mdx +34 -42
- package/.docs/raw/getting-started/studio.mdx +40 -58
- package/.docs/raw/getting-started/templates.mdx +22 -27
- package/.docs/raw/index.mdx +9 -9
- package/.docs/raw/mastra-cloud/dashboard.mdx +10 -10
- package/.docs/raw/mastra-cloud/observability.mdx +7 -8
- package/.docs/raw/mastra-cloud/overview.mdx +16 -22
- package/.docs/raw/mastra-cloud/setting-up.mdx +33 -20
- package/.docs/raw/memory/conversation-history.mdx +2 -2
- package/.docs/raw/memory/overview.mdx +21 -23
- package/.docs/raw/memory/semantic-recall.mdx +14 -13
- package/.docs/raw/memory/storage/memory-with-libsql.mdx +27 -28
- package/.docs/raw/memory/storage/memory-with-pg.mdx +26 -26
- package/.docs/raw/memory/storage/memory-with-upstash.mdx +26 -27
- package/.docs/raw/memory/threads-and-resources.mdx +23 -20
- package/.docs/raw/memory/working-memory.mdx +27 -38
- package/.docs/raw/observability/ai-tracing/exporters/arize.mdx +30 -29
- package/.docs/raw/observability/ai-tracing/exporters/braintrust.mdx +8 -9
- package/.docs/raw/observability/ai-tracing/exporters/cloud.mdx +17 -16
- package/.docs/raw/observability/ai-tracing/exporters/default.mdx +31 -32
- package/.docs/raw/observability/ai-tracing/exporters/langfuse.mdx +18 -17
- package/.docs/raw/observability/ai-tracing/exporters/langsmith.mdx +14 -14
- package/.docs/raw/observability/ai-tracing/exporters/otel.mdx +56 -46
- package/.docs/raw/observability/ai-tracing/overview.mdx +145 -122
- package/.docs/raw/observability/ai-tracing/processors/sensitive-data-filter.mdx +57 -36
- package/.docs/raw/observability/logging.mdx +14 -17
- package/.docs/raw/observability/nextjs-tracing.mdx +5 -5
- package/.docs/raw/observability/otel-tracing.mdx +17 -18
- package/.docs/raw/observability/overview.mdx +14 -10
- package/.docs/raw/rag/chunking-and-embedding.mdx +10 -8
- package/.docs/raw/rag/overview.mdx +5 -5
- package/.docs/raw/rag/retrieval.mdx +125 -107
- package/.docs/raw/rag/vector-databases.mdx +232 -223
- package/.docs/raw/reference/agents/agent.mdx +54 -41
- package/.docs/raw/reference/agents/generate.mdx +367 -261
- package/.docs/raw/reference/agents/generateLegacy.mdx +260 -178
- package/.docs/raw/reference/agents/getDefaultGenerateOptions.mdx +7 -5
- package/.docs/raw/reference/agents/getDefaultStreamOptions.mdx +7 -5
- package/.docs/raw/reference/agents/getDescription.mdx +3 -2
- package/.docs/raw/reference/agents/getInstructions.mdx +7 -5
- package/.docs/raw/reference/agents/getLLM.mdx +11 -7
- package/.docs/raw/reference/agents/getMemory.mdx +7 -5
- package/.docs/raw/reference/agents/getModel.mdx +7 -5
- package/.docs/raw/reference/agents/getScorers.mdx +7 -5
- package/.docs/raw/reference/agents/getTools.mdx +7 -5
- package/.docs/raw/reference/agents/getVoice.mdx +7 -5
- package/.docs/raw/reference/agents/getWorkflows.mdx +7 -5
- package/.docs/raw/reference/agents/listAgents.mdx +6 -4
- package/.docs/raw/reference/agents/network.mdx +171 -116
- package/.docs/raw/reference/auth/auth0.mdx +18 -14
- package/.docs/raw/reference/auth/clerk.mdx +15 -12
- package/.docs/raw/reference/auth/firebase.mdx +23 -16
- package/.docs/raw/reference/auth/jwt.mdx +7 -6
- package/.docs/raw/reference/auth/supabase.mdx +13 -10
- package/.docs/raw/reference/auth/workos.mdx +17 -13
- package/.docs/raw/reference/cli/create-mastra.mdx +61 -44
- package/.docs/raw/reference/cli/mastra.mdx +11 -11
- package/.docs/raw/reference/client-js/agents.mdx +40 -43
- package/.docs/raw/reference/client-js/error-handling.mdx +1 -1
- package/.docs/raw/reference/client-js/logs.mdx +1 -1
- package/.docs/raw/reference/client-js/mastra-client.mdx +21 -13
- package/.docs/raw/reference/client-js/memory.mdx +6 -2
- package/.docs/raw/reference/client-js/observability.mdx +10 -9
- package/.docs/raw/reference/client-js/telemetry.mdx +1 -1
- package/.docs/raw/reference/client-js/tools.mdx +1 -1
- package/.docs/raw/reference/client-js/vectors.mdx +1 -1
- package/.docs/raw/reference/client-js/workflows-legacy.mdx +1 -1
- package/.docs/raw/reference/client-js/workflows.mdx +7 -7
- package/.docs/raw/reference/core/getAgent.mdx +6 -5
- package/.docs/raw/reference/core/getAgentById.mdx +5 -4
- package/.docs/raw/reference/core/getAgents.mdx +3 -3
- package/.docs/raw/reference/core/getDeployer.mdx +4 -3
- package/.docs/raw/reference/core/getLogger.mdx +4 -3
- package/.docs/raw/reference/core/getLogs.mdx +10 -6
- package/.docs/raw/reference/core/getLogsByRunId.mdx +8 -5
- package/.docs/raw/reference/core/getMCPServer.mdx +8 -5
- package/.docs/raw/reference/core/getMCPServers.mdx +4 -3
- package/.docs/raw/reference/core/getMemory.mdx +4 -3
- package/.docs/raw/reference/core/getScorer.mdx +15 -12
- package/.docs/raw/reference/core/getScorerByName.mdx +12 -9
- package/.docs/raw/reference/core/getScorers.mdx +3 -2
- package/.docs/raw/reference/core/getServer.mdx +4 -3
- package/.docs/raw/reference/core/getStorage.mdx +4 -3
- package/.docs/raw/reference/core/getTelemetry.mdx +4 -3
- package/.docs/raw/reference/core/getVector.mdx +6 -4
- package/.docs/raw/reference/core/getVectors.mdx +4 -3
- package/.docs/raw/reference/core/getWorkflow.mdx +7 -4
- package/.docs/raw/reference/core/getWorkflows.mdx +5 -3
- package/.docs/raw/reference/core/mastra-class.mdx +16 -16
- package/.docs/raw/reference/core/setLogger.mdx +6 -4
- package/.docs/raw/reference/core/setStorage.mdx +4 -4
- package/.docs/raw/reference/core/setTelemetry.mdx +4 -3
- package/.docs/raw/reference/deployer/cloudflare.mdx +11 -7
- package/.docs/raw/reference/deployer/deployer.mdx +2 -1
- package/.docs/raw/reference/deployer/netlify.mdx +4 -4
- package/.docs/raw/reference/deployer/vercel.mdx +6 -6
- package/.docs/raw/reference/evals/answer-relevancy.mdx +4 -5
- package/.docs/raw/reference/evals/bias.mdx +4 -5
- package/.docs/raw/reference/evals/completeness.mdx +4 -5
- package/.docs/raw/reference/evals/content-similarity.mdx +4 -5
- package/.docs/raw/reference/evals/context-position.mdx +4 -5
- package/.docs/raw/reference/evals/context-precision.mdx +4 -5
- package/.docs/raw/reference/evals/context-relevancy.mdx +4 -5
- package/.docs/raw/reference/evals/contextual-recall.mdx +4 -5
- package/.docs/raw/reference/evals/faithfulness.mdx +4 -5
- package/.docs/raw/reference/evals/hallucination.mdx +4 -6
- package/.docs/raw/reference/evals/keyword-coverage.mdx +4 -5
- package/.docs/raw/reference/evals/prompt-alignment.mdx +4 -6
- package/.docs/raw/reference/evals/summarization.mdx +4 -6
- package/.docs/raw/reference/evals/textual-difference.mdx +4 -5
- package/.docs/raw/reference/evals/tone-consistency.mdx +4 -7
- package/.docs/raw/reference/evals/toxicity.mdx +4 -5
- package/.docs/raw/reference/index.mdx +3 -3
- package/.docs/raw/reference/legacyWorkflows/after.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/afterEvent.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/commit.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/createRun.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/else.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/events.mdx +7 -7
- package/.docs/raw/reference/legacyWorkflows/execute.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/if.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/resume.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/resumeWithEvent.mdx +6 -6
- package/.docs/raw/reference/legacyWorkflows/snapshots.mdx +6 -6
- package/.docs/raw/reference/legacyWorkflows/start.mdx +6 -6
- package/.docs/raw/reference/legacyWorkflows/step-class.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/step-condition.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/step-function.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/step-options.mdx +6 -6
- package/.docs/raw/reference/legacyWorkflows/step-retries.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/suspend.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/then.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/until.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/watch.mdx +3 -3
- package/.docs/raw/reference/legacyWorkflows/while.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/workflow.mdx +7 -7
- package/.docs/raw/reference/memory/createThread.mdx +10 -10
- package/.docs/raw/reference/memory/deleteMessages.mdx +7 -5
- package/.docs/raw/reference/memory/getThreadById.mdx +6 -5
- package/.docs/raw/reference/memory/getThreadsByResourceId.mdx +11 -10
- package/.docs/raw/reference/memory/getThreadsByResourceIdPaginated.mdx +10 -9
- package/.docs/raw/reference/memory/{Memory.mdx → memory-class.mdx} +53 -46
- package/.docs/raw/reference/memory/query.mdx +39 -25
- package/.docs/raw/reference/observability/ai-tracing/ai-tracing.mdx +9 -5
- package/.docs/raw/reference/observability/ai-tracing/configuration.mdx +26 -22
- package/.docs/raw/reference/observability/ai-tracing/exporters/arize.mdx +16 -12
- package/.docs/raw/reference/observability/ai-tracing/exporters/braintrust.mdx +14 -14
- package/.docs/raw/reference/observability/ai-tracing/exporters/cloud-exporter.mdx +16 -11
- package/.docs/raw/reference/observability/ai-tracing/exporters/console-exporter.mdx +10 -6
- package/.docs/raw/reference/observability/ai-tracing/exporters/default-exporter.mdx +23 -15
- package/.docs/raw/reference/observability/ai-tracing/exporters/langfuse.mdx +6 -6
- package/.docs/raw/reference/observability/ai-tracing/exporters/langsmith.mdx +13 -13
- package/.docs/raw/reference/observability/ai-tracing/exporters/otel.mdx +44 -36
- package/.docs/raw/reference/observability/ai-tracing/interfaces.mdx +115 -106
- package/.docs/raw/reference/observability/ai-tracing/processors/sensitive-data-filter.mdx +37 -32
- package/.docs/raw/reference/observability/ai-tracing/span.mdx +29 -26
- package/.docs/raw/reference/observability/logging/pino-logger.mdx +13 -15
- package/.docs/raw/reference/observability/otel-tracing/otel-config.mdx +2 -2
- package/.docs/raw/reference/observability/otel-tracing/providers/arize-ax.mdx +3 -3
- package/.docs/raw/reference/observability/otel-tracing/providers/arize-phoenix.mdx +2 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/dash0.mdx +1 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/index.mdx +12 -14
- package/.docs/raw/reference/observability/otel-tracing/providers/keywordsai.mdx +3 -3
- package/.docs/raw/reference/observability/otel-tracing/providers/laminar.mdx +1 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/langfuse.mdx +4 -4
- package/.docs/raw/reference/observability/otel-tracing/providers/langsmith.mdx +1 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/langwatch.mdx +2 -2
- package/.docs/raw/reference/observability/otel-tracing/providers/new-relic.mdx +1 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/signoz.mdx +1 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/traceloop.mdx +1 -1
- package/.docs/raw/reference/processors/batch-parts-processor.mdx +13 -10
- package/.docs/raw/reference/processors/language-detector.mdx +27 -17
- package/.docs/raw/reference/processors/moderation-processor.mdx +26 -17
- package/.docs/raw/reference/processors/pii-detector.mdx +28 -18
- package/.docs/raw/reference/processors/prompt-injection-detector.mdx +25 -17
- package/.docs/raw/reference/processors/system-prompt-scrubber.mdx +26 -17
- package/.docs/raw/reference/processors/token-limiter-processor.mdx +22 -15
- package/.docs/raw/reference/processors/unicode-normalizer.mdx +13 -12
- package/.docs/raw/reference/rag/chunk.mdx +41 -23
- package/.docs/raw/reference/rag/database-config.mdx +56 -38
- package/.docs/raw/reference/rag/document.mdx +1 -1
- package/.docs/raw/reference/rag/embeddings.mdx +1 -1
- package/.docs/raw/reference/rag/extract-params.mdx +1 -1
- package/.docs/raw/reference/rag/graph-rag.mdx +1 -1
- package/.docs/raw/reference/rag/metadata-filters.mdx +23 -26
- package/.docs/raw/reference/rag/rerank.mdx +1 -1
- package/.docs/raw/reference/rag/rerankWithScorer.mdx +2 -2
- package/.docs/raw/reference/scorers/answer-relevancy.mdx +35 -17
- package/.docs/raw/reference/scorers/answer-similarity.mdx +84 -68
- package/.docs/raw/reference/scorers/bias.mdx +22 -19
- package/.docs/raw/reference/scorers/completeness.mdx +21 -16
- package/.docs/raw/reference/scorers/content-similarity.mdx +12 -10
- package/.docs/raw/reference/scorers/context-precision.mdx +73 -64
- package/.docs/raw/reference/scorers/context-relevance.mdx +142 -126
- package/.docs/raw/reference/scorers/create-scorer.mdx +93 -61
- package/.docs/raw/reference/scorers/faithfulness.mdx +21 -13
- package/.docs/raw/reference/scorers/hallucination.mdx +17 -12
- package/.docs/raw/reference/scorers/keyword-coverage.mdx +32 -27
- package/.docs/raw/reference/scorers/mastra-scorer.mdx +39 -33
- package/.docs/raw/reference/scorers/noise-sensitivity.mdx +203 -152
- package/.docs/raw/reference/scorers/prompt-alignment.mdx +186 -132
- package/.docs/raw/reference/scorers/run-experiment.mdx +40 -31
- package/.docs/raw/reference/scorers/textual-difference.mdx +25 -26
- package/.docs/raw/reference/scorers/tone-consistency.mdx +29 -26
- package/.docs/raw/reference/scorers/tool-call-accuracy.mdx +180 -176
- package/.docs/raw/reference/scorers/toxicity.mdx +35 -31
- package/.docs/raw/reference/storage/cloudflare-d1.mdx +1 -1
- package/.docs/raw/reference/storage/cloudflare.mdx +1 -1
- package/.docs/raw/reference/storage/dynamodb.mdx +1 -1
- package/.docs/raw/reference/storage/lance.mdx +1 -1
- package/.docs/raw/reference/storage/libsql.mdx +2 -2
- package/.docs/raw/reference/storage/mongodb.mdx +4 -5
- package/.docs/raw/reference/storage/mssql.mdx +5 -4
- package/.docs/raw/reference/storage/postgresql.mdx +35 -33
- package/.docs/raw/reference/storage/upstash.mdx +6 -5
- package/.docs/raw/reference/streaming/ChunkType.mdx +788 -314
- package/.docs/raw/reference/streaming/agents/MastraModelOutput.mdx +265 -109
- package/.docs/raw/reference/streaming/agents/stream.mdx +375 -266
- package/.docs/raw/reference/streaming/agents/streamLegacy.mdx +233 -162
- package/.docs/raw/reference/streaming/workflows/observeStream.mdx +4 -4
- package/.docs/raw/reference/streaming/workflows/observeStreamVNext.mdx +5 -5
- package/.docs/raw/reference/streaming/workflows/resumeStreamVNext.mdx +24 -20
- package/.docs/raw/reference/streaming/workflows/stream.mdx +35 -26
- package/.docs/raw/reference/streaming/workflows/streamVNext.mdx +36 -27
- package/.docs/raw/reference/templates/overview.mdx +16 -39
- package/.docs/raw/reference/tools/client.mdx +1 -1
- package/.docs/raw/reference/tools/create-tool.mdx +45 -35
- package/.docs/raw/reference/tools/document-chunker-tool.mdx +2 -2
- package/.docs/raw/reference/tools/graph-rag-tool.mdx +12 -12
- package/.docs/raw/reference/tools/mcp-client.mdx +70 -64
- package/.docs/raw/reference/tools/mcp-server.mdx +91 -78
- package/.docs/raw/reference/tools/vector-query-tool.mdx +48 -38
- package/.docs/raw/reference/vectors/astra.mdx +1 -1
- package/.docs/raw/reference/vectors/chroma.mdx +25 -19
- package/.docs/raw/reference/vectors/couchbase.mdx +4 -4
- package/.docs/raw/reference/vectors/lance.mdx +5 -6
- package/.docs/raw/reference/vectors/libsql.mdx +1 -1
- package/.docs/raw/reference/vectors/mongodb.mdx +1 -1
- package/.docs/raw/reference/vectors/opensearch.mdx +1 -1
- package/.docs/raw/reference/vectors/pg.mdx +8 -4
- package/.docs/raw/reference/vectors/pinecone.mdx +1 -1
- package/.docs/raw/reference/vectors/qdrant.mdx +1 -1
- package/.docs/raw/reference/vectors/s3vectors.mdx +35 -27
- package/.docs/raw/reference/vectors/turbopuffer.mdx +1 -1
- package/.docs/raw/reference/vectors/upstash.mdx +33 -25
- package/.docs/raw/reference/vectors/vectorize.mdx +1 -1
- package/.docs/raw/reference/voice/azure.mdx +1 -1
- package/.docs/raw/reference/voice/cloudflare.mdx +1 -1
- package/.docs/raw/reference/voice/composite-voice.mdx +1 -1
- package/.docs/raw/reference/voice/deepgram.mdx +1 -1
- package/.docs/raw/reference/voice/elevenlabs.mdx +1 -1
- package/.docs/raw/reference/voice/google-gemini-live.mdx +6 -4
- package/.docs/raw/reference/voice/google.mdx +1 -1
- package/.docs/raw/reference/voice/mastra-voice.mdx +1 -1
- package/.docs/raw/reference/voice/murf.mdx +1 -1
- package/.docs/raw/reference/voice/openai-realtime.mdx +1 -1
- package/.docs/raw/reference/voice/openai.mdx +1 -1
- package/.docs/raw/reference/voice/playai.mdx +1 -1
- package/.docs/raw/reference/voice/sarvam.mdx +1 -1
- package/.docs/raw/reference/voice/speechify.mdx +1 -1
- package/.docs/raw/reference/voice/voice.addInstructions.mdx +1 -1
- package/.docs/raw/reference/voice/voice.addTools.mdx +1 -1
- package/.docs/raw/reference/voice/voice.answer.mdx +1 -1
- package/.docs/raw/reference/voice/voice.close.mdx +1 -1
- package/.docs/raw/reference/voice/voice.connect.mdx +1 -1
- package/.docs/raw/reference/voice/voice.events.mdx +1 -1
- package/.docs/raw/reference/voice/voice.getSpeakers.mdx +23 -30
- package/.docs/raw/reference/voice/voice.listen.mdx +1 -1
- package/.docs/raw/reference/voice/voice.off.mdx +1 -1
- package/.docs/raw/reference/voice/voice.on.mdx +1 -1
- package/.docs/raw/reference/voice/voice.send.mdx +1 -1
- package/.docs/raw/reference/voice/voice.speak.mdx +1 -1
- package/.docs/raw/reference/voice/voice.updateConfig.mdx +1 -1
- package/.docs/raw/reference/workflows/run-methods/cancel.mdx +4 -3
- package/.docs/raw/reference/workflows/run-methods/resume.mdx +49 -34
- package/.docs/raw/reference/workflows/run-methods/start.mdx +43 -31
- package/.docs/raw/reference/workflows/run-methods/watch.mdx +7 -8
- package/.docs/raw/reference/workflows/run.mdx +7 -10
- package/.docs/raw/reference/workflows/step.mdx +15 -12
- package/.docs/raw/reference/workflows/workflow-methods/branch.mdx +2 -2
- package/.docs/raw/reference/workflows/workflow-methods/commit.mdx +1 -1
- package/.docs/raw/reference/workflows/workflow-methods/create-run.mdx +7 -7
- package/.docs/raw/reference/workflows/workflow-methods/dountil.mdx +2 -2
- package/.docs/raw/reference/workflows/workflow-methods/dowhile.mdx +2 -2
- package/.docs/raw/reference/workflows/workflow-methods/foreach.mdx +1 -1
- package/.docs/raw/reference/workflows/workflow-methods/map.mdx +3 -2
- package/.docs/raw/reference/workflows/workflow-methods/parallel.mdx +1 -1
- package/.docs/raw/reference/workflows/workflow-methods/sendEvent.mdx +5 -5
- package/.docs/raw/reference/workflows/workflow-methods/sleep.mdx +3 -2
- package/.docs/raw/reference/workflows/workflow-methods/sleepUntil.mdx +3 -2
- package/.docs/raw/reference/workflows/workflow-methods/then.mdx +1 -1
- package/.docs/raw/reference/workflows/workflow-methods/waitForEvent.mdx +3 -3
- package/.docs/raw/reference/workflows/workflow.mdx +13 -10
- package/.docs/raw/scorers/custom-scorers.mdx +58 -48
- package/.docs/raw/scorers/evals-old-api/custom-eval.mdx +6 -8
- package/.docs/raw/scorers/evals-old-api/overview.mdx +8 -8
- package/.docs/raw/scorers/evals-old-api/running-in-ci.mdx +9 -9
- package/.docs/raw/scorers/evals-old-api/textual-evals.mdx +5 -5
- package/.docs/raw/scorers/off-the-shelf-scorers.mdx +3 -1
- package/.docs/raw/scorers/overview.mdx +20 -19
- package/.docs/raw/server-db/custom-api-routes.mdx +8 -8
- package/.docs/raw/server-db/mastra-client.mdx +56 -54
- package/.docs/raw/server-db/middleware.mdx +11 -7
- package/.docs/raw/server-db/production-server.mdx +5 -7
- package/.docs/raw/server-db/runtime-context.mdx +41 -38
- package/.docs/raw/server-db/storage.mdx +82 -87
- package/.docs/raw/streaming/events.mdx +24 -16
- package/.docs/raw/streaming/overview.mdx +17 -18
- package/.docs/raw/streaming/tool-streaming.mdx +9 -10
- package/.docs/raw/streaming/workflow-streaming.mdx +14 -12
- package/.docs/raw/tools-mcp/advanced-usage.mdx +2 -2
- package/.docs/raw/tools-mcp/mcp-overview.mdx +92 -102
- package/.docs/raw/tools-mcp/overview.mdx +9 -14
- package/.docs/raw/voice/overview.mdx +273 -250
- package/.docs/raw/voice/speech-to-speech.mdx +14 -12
- package/.docs/raw/voice/speech-to-text.mdx +2 -2
- package/.docs/raw/voice/text-to-speech.mdx +2 -2
- package/.docs/raw/workflows/agents-and-tools.mdx +29 -28
- package/.docs/raw/workflows/control-flow.mdx +24 -24
- package/.docs/raw/workflows/error-handling.mdx +15 -17
- package/.docs/raw/workflows/human-in-the-loop.mdx +39 -39
- package/.docs/raw/workflows/inngest-workflow.mdx +33 -29
- package/.docs/raw/workflows/input-data-mapping.mdx +9 -9
- package/.docs/raw/workflows/overview.mdx +60 -60
- package/.docs/raw/workflows/snapshots.mdx +54 -36
- package/.docs/raw/workflows/suspend-and-resume.mdx +52 -57
- package/.docs/raw/workflows-legacy/control-flow.mdx +15 -17
- package/.docs/raw/workflows-legacy/dynamic-workflows.mdx +3 -1
- package/.docs/raw/workflows-legacy/error-handling.mdx +8 -6
- package/.docs/raw/workflows-legacy/nested-workflows.mdx +6 -0
- package/.docs/raw/workflows-legacy/overview.mdx +28 -26
- package/.docs/raw/workflows-legacy/runtime-variables.mdx +4 -2
- package/.docs/raw/workflows-legacy/steps.mdx +5 -3
- package/.docs/raw/workflows-legacy/suspend-and-resume.mdx +10 -8
- package/.docs/raw/workflows-legacy/variables.mdx +10 -8
- package/CHANGELOG.md +14 -0
- package/package.json +5 -5
- package/.docs/raw/memory/storage/memory-with-mongodb.mdx +0 -148
|
@@ -9,7 +9,7 @@ The `Workflow` class enables you to create state machines for complex sequences
|
|
|
9
9
|
|
|
10
10
|
## Usage example
|
|
11
11
|
|
|
12
|
-
```typescript
|
|
12
|
+
```typescript title="src/mastra/workflows/test-workflow.ts" showLineNumbers copy
|
|
13
13
|
import { createWorkflow } from "@mastra/core/workflows";
|
|
14
14
|
import { z } from "zod";
|
|
15
15
|
|
|
@@ -20,8 +20,8 @@ export const workflow = createWorkflow({
|
|
|
20
20
|
}),
|
|
21
21
|
outputSchema: z.object({
|
|
22
22
|
value: z.string(),
|
|
23
|
-
})
|
|
24
|
-
})
|
|
23
|
+
}),
|
|
24
|
+
});
|
|
25
25
|
```
|
|
26
26
|
|
|
27
27
|
## Constructor parameters
|
|
@@ -46,7 +46,8 @@ export const workflow = createWorkflow({
|
|
|
46
46
|
{
|
|
47
47
|
name: "stateSchema",
|
|
48
48
|
type: "z.ZodObject<any>",
|
|
49
|
-
description:
|
|
49
|
+
description:
|
|
50
|
+
"Optional Zod schema for the workflow state. Automatically injected when using Mastra's state system. If not specified, type is 'any'.",
|
|
50
51
|
isOptional: true,
|
|
51
52
|
},
|
|
52
53
|
{
|
|
@@ -54,7 +55,7 @@ export const workflow = createWorkflow({
|
|
|
54
55
|
type: "WorkflowOptions",
|
|
55
56
|
description: "Optional options for the workflow",
|
|
56
57
|
isOptional: true,
|
|
57
|
-
}
|
|
58
|
+
},
|
|
58
59
|
]}
|
|
59
60
|
/>
|
|
60
61
|
|
|
@@ -71,14 +72,16 @@ export const workflow = createWorkflow({
|
|
|
71
72
|
{
|
|
72
73
|
name: "validateInputs",
|
|
73
74
|
type: "boolean",
|
|
74
|
-
description:
|
|
75
|
+
description:
|
|
76
|
+
"Optional flag to determine whether to validate the workflow inputs. This also applies default values from zodSchemas on the workflow/step input/resume data. If input/resume data validation fails on start/resume, the workflow will not start/resume, it throws an error instead. If input data validation fails on a step execution, the step fails, causing the workflow to fail and the error is returned.",
|
|
75
77
|
isOptional: true,
|
|
76
78
|
defaultValue: "false",
|
|
77
79
|
},
|
|
78
80
|
{
|
|
79
81
|
name: "shouldPersistSnapshot",
|
|
80
82
|
type: "(params: { stepResults: Record<string, StepResult<any, any, any, any>>; workflowStatus: WorkflowRunStatus }) => boolean",
|
|
81
|
-
description:
|
|
83
|
+
description:
|
|
84
|
+
"Optional flag to determine whether to persist the workflow snapshot",
|
|
82
85
|
isOptional: true,
|
|
83
86
|
defaultValue: "() => true",
|
|
84
87
|
},
|
|
@@ -114,7 +117,7 @@ A workflow's `status` indicates its current execution state. The possible values
|
|
|
114
117
|
|
|
115
118
|
## Extended usage example
|
|
116
119
|
|
|
117
|
-
```typescript
|
|
120
|
+
```typescript title="src/test-run.ts" showLineNumbers copy
|
|
118
121
|
import { mastra } from "./mastra";
|
|
119
122
|
|
|
120
123
|
const run = await mastra.getWorkflow("workflow").createRunAsync();
|
|
@@ -128,5 +131,5 @@ if (result.status === "suspended") {
|
|
|
128
131
|
|
|
129
132
|
## Related
|
|
130
133
|
|
|
131
|
-
- [Step Class](./step
|
|
132
|
-
- [Control flow](
|
|
134
|
+
- [Step Class](./step)
|
|
135
|
+
- [Control flow](/docs/workflows/control-flow)
|
|
@@ -1,4 +1,8 @@
|
|
|
1
|
-
|
|
1
|
+
---
|
|
2
|
+
title: "Custom Scorers | Scorers | Mastra Docs"
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
# Custom scorers
|
|
2
6
|
|
|
3
7
|
Mastra provides a unified `createScorer` factory that allows you to build custom evaluation logic using either JavaScript functions or LLM-based prompt objects for each step. This flexibility lets you choose the best approach for each part of your evaluation pipeline.
|
|
4
8
|
|
|
@@ -16,12 +20,14 @@ Each step can use either **functions** or **prompt objects** (LLM-based evaluati
|
|
|
16
20
|
### Functions vs Prompt Objects
|
|
17
21
|
|
|
18
22
|
**Functions** use JavaScript for deterministic logic. They're ideal for:
|
|
23
|
+
|
|
19
24
|
- Algorithmic evaluations with clear criteria
|
|
20
25
|
- Performance-critical scenarios
|
|
21
26
|
- Integration with existing libraries
|
|
22
27
|
- Consistent, reproducible results
|
|
23
28
|
|
|
24
29
|
**Prompt Objects** use LLMs as judges for evaluation. They're perfect for:
|
|
30
|
+
|
|
25
31
|
- Subjective evaluations requiring human-like judgment
|
|
26
32
|
- Complex criteria difficult to code algorithmically
|
|
27
33
|
- Natural language understanding tasks
|
|
@@ -61,9 +67,8 @@ For type safety and compatibility with both live agent scoring and trace scoring
|
|
|
61
67
|
```typescript
|
|
62
68
|
const myScorer = createScorer({
|
|
63
69
|
// ...
|
|
64
|
-
type:
|
|
65
|
-
})
|
|
66
|
-
.generateScore(({ run, results }) => {
|
|
70
|
+
type: "agent", // Automatically handles agent input/output types
|
|
71
|
+
}).generateScore(({ run, results }) => {
|
|
67
72
|
// run.output is automatically typed as ScorerRunOutputForAgent
|
|
68
73
|
// run.input is automatically typed as ScorerRunInputForAgent
|
|
69
74
|
});
|
|
@@ -83,7 +88,7 @@ const glutenCheckerScorer = createScorer(...)
|
|
|
83
88
|
// Extract and clean recipe text
|
|
84
89
|
const recipeText = run.output.text.toLowerCase();
|
|
85
90
|
const wordCount = recipeText.split(' ').length;
|
|
86
|
-
|
|
91
|
+
|
|
87
92
|
return {
|
|
88
93
|
recipeText,
|
|
89
94
|
wordCount,
|
|
@@ -105,7 +110,7 @@ const glutenCheckerScorer = createScorer(...)
|
|
|
105
110
|
createPrompt: ({ run }) => `
|
|
106
111
|
Extract all ingredients and cooking methods from this recipe:
|
|
107
112
|
${run.output.text}
|
|
108
|
-
|
|
113
|
+
|
|
109
114
|
Return JSON with ingredients and cookingMethods arrays.
|
|
110
115
|
`
|
|
111
116
|
})
|
|
@@ -124,13 +129,13 @@ const glutenCheckerScorer = createScorer({...})
|
|
|
124
129
|
.preprocess(...)
|
|
125
130
|
.analyze(({ run, results }) => {
|
|
126
131
|
const { recipeText, hasCommonGlutenWords } = results.preprocessStepResult;
|
|
127
|
-
|
|
132
|
+
|
|
128
133
|
// Simple gluten detection algorithm
|
|
129
134
|
const glutenKeywords = ['wheat', 'flour', 'barley', 'rye', 'bread'];
|
|
130
|
-
const foundGlutenWords = glutenKeywords.filter(word =>
|
|
135
|
+
const foundGlutenWords = glutenKeywords.filter(word =>
|
|
131
136
|
recipeText.includes(word)
|
|
132
137
|
);
|
|
133
|
-
|
|
138
|
+
|
|
134
139
|
return {
|
|
135
140
|
isGlutenFree: foundGlutenWords.length === 0,
|
|
136
141
|
detectedGlutenSources: foundGlutenWords,
|
|
@@ -154,7 +159,7 @@ const glutenCheckerScorer = createScorer({...})
|
|
|
154
159
|
createPrompt: ({ run, results }) => `
|
|
155
160
|
Analyze this recipe for gluten content:
|
|
156
161
|
"${results.preprocessStepResult.recipeText}"
|
|
157
|
-
|
|
162
|
+
|
|
158
163
|
Look for wheat, barley, rye, and hidden sources like soy sauce.
|
|
159
164
|
Return JSON with isGlutenFree, glutenSources array, and confidence (0-1).
|
|
160
165
|
`
|
|
@@ -175,7 +180,7 @@ const glutenCheckerScorer = createScorer({...})
|
|
|
175
180
|
.analyze(...)
|
|
176
181
|
.generateScore(({ results }) => {
|
|
177
182
|
const { isGlutenFree, confidence } = results.analyzeStepResult;
|
|
178
|
-
|
|
183
|
+
|
|
179
184
|
// Return 1 for gluten-free, 0 for contains gluten
|
|
180
185
|
// Weight by confidence level
|
|
181
186
|
return isGlutenFree ? confidence : 0;
|
|
@@ -199,7 +204,7 @@ const glutenCheckerScorer = createScorer({...})
|
|
|
199
204
|
.generateScore(...)
|
|
200
205
|
.generateReason(({ results, score }) => {
|
|
201
206
|
const { isGlutenFree, glutenSources } = results.analyzeStepResult;
|
|
202
|
-
|
|
207
|
+
|
|
203
208
|
if (isGlutenFree) {
|
|
204
209
|
return `Score: ${score}. This recipe is gluten-free with no harmful ingredients detected.`;
|
|
205
210
|
} else {
|
|
@@ -220,14 +225,12 @@ const glutenCheckerScorer = createScorer({...})
|
|
|
220
225
|
createPrompt: ({ results, score }) => `
|
|
221
226
|
Explain why this recipe received a score of ${score}.
|
|
222
227
|
Analysis: ${JSON.stringify(results.analyzeStepResult)}
|
|
223
|
-
|
|
228
|
+
|
|
224
229
|
Provide a clear explanation for someone with dietary restrictions.
|
|
225
230
|
`
|
|
226
231
|
})
|
|
227
232
|
```
|
|
228
233
|
|
|
229
|
-
|
|
230
|
-
|
|
231
234
|
## Example: Create a custom scorer
|
|
232
235
|
|
|
233
236
|
A custom scorer in Mastra uses `createScorer` with four core components:
|
|
@@ -241,14 +244,18 @@ Together, these components allow you to define custom evaluation logic using LLM
|
|
|
241
244
|
|
|
242
245
|
> See [createScorer](/reference/scorers/create-scorer) for the full API and configuration options.
|
|
243
246
|
|
|
244
|
-
```typescript
|
|
245
|
-
import { openai } from
|
|
246
|
-
import { createScorer } from
|
|
247
|
-
import { z } from
|
|
247
|
+
```typescript title="src/mastra/scorers/gluten-checker.ts" showLineNumbers copy
|
|
248
|
+
import { openai } from "@ai-sdk/openai";
|
|
249
|
+
import { createScorer } from "@mastra/core/scores";
|
|
250
|
+
import { z } from "zod";
|
|
248
251
|
|
|
249
252
|
export const GLUTEN_INSTRUCTIONS = `You are a Chef that identifies if recipes contain gluten.`;
|
|
250
253
|
|
|
251
|
-
export const generateGlutenPrompt = ({
|
|
254
|
+
export const generateGlutenPrompt = ({
|
|
255
|
+
output,
|
|
256
|
+
}: {
|
|
257
|
+
output: string;
|
|
258
|
+
}) => `Check if this recipe is gluten-free.
|
|
252
259
|
|
|
253
260
|
Check for:
|
|
254
261
|
- Wheat
|
|
@@ -285,23 +292,23 @@ export const generateReasonPrompt = ({
|
|
|
285
292
|
}: {
|
|
286
293
|
isGlutenFree: boolean;
|
|
287
294
|
glutenSources: string[];
|
|
288
|
-
}) => `Explain why this recipe is${isGlutenFree ?
|
|
295
|
+
}) => `Explain why this recipe is${isGlutenFree ? "" : " not"} gluten-free.
|
|
289
296
|
|
|
290
|
-
${glutenSources.length > 0 ? `Sources of gluten: ${glutenSources.join(
|
|
297
|
+
${glutenSources.length > 0 ? `Sources of gluten: ${glutenSources.join(", ")}` : "No gluten-containing ingredients found"}
|
|
291
298
|
|
|
292
299
|
Return your response in this format:
|
|
293
300
|
"This recipe is [gluten-free/contains gluten] because [explanation]"`;
|
|
294
301
|
|
|
295
302
|
export const glutenCheckerScorer = createScorer({
|
|
296
|
-
name:
|
|
297
|
-
description:
|
|
303
|
+
name: "Gluten Checker",
|
|
304
|
+
description: "Check if the output contains any gluten",
|
|
298
305
|
judge: {
|
|
299
|
-
model: openai(
|
|
306
|
+
model: openai("gpt-4o"),
|
|
300
307
|
instructions: GLUTEN_INSTRUCTIONS,
|
|
301
308
|
},
|
|
302
309
|
})
|
|
303
310
|
.analyze({
|
|
304
|
-
description:
|
|
311
|
+
description: "Analyze the output for gluten",
|
|
305
312
|
outputSchema: z.object({
|
|
306
313
|
isGlutenFree: z.boolean(),
|
|
307
314
|
glutenSources: z.array(z.string()),
|
|
@@ -315,7 +322,7 @@ export const glutenCheckerScorer = createScorer({
|
|
|
315
322
|
return results.analyzeStepResult.isGlutenFree ? 1 : 0;
|
|
316
323
|
})
|
|
317
324
|
.generateReason({
|
|
318
|
-
description:
|
|
325
|
+
description: "Generate a reason for the score",
|
|
319
326
|
createPrompt: ({ results }) => {
|
|
320
327
|
return generateReasonPrompt({
|
|
321
328
|
glutenSources: results.analyzeStepResult.glutenSources,
|
|
@@ -355,6 +362,7 @@ Defines how the LLM should analyze the input and what structured output to retur
|
|
|
355
362
|
```
|
|
356
363
|
|
|
357
364
|
The analysis step uses a prompt object to:
|
|
365
|
+
|
|
358
366
|
- Provide a clear description of the analysis task
|
|
359
367
|
- Define expected output structure with Zod schema (both boolean result and list of gluten sources)
|
|
360
368
|
- Generate dynamic prompts based on the input content
|
|
@@ -388,11 +396,12 @@ Provides human-readable explanations for the score using another LLM call.
|
|
|
388
396
|
```
|
|
389
397
|
|
|
390
398
|
The reason generation step creates explanations that help users understand why a particular score was assigned, using both the boolean result and the specific gluten sources identified by the analysis step.
|
|
391
|
-
|
|
399
|
+
|
|
400
|
+
````
|
|
392
401
|
|
|
393
402
|
## High gluten-free example
|
|
394
403
|
|
|
395
|
-
```typescript
|
|
404
|
+
```typescript title="src/example-high-gluten-free.ts" showLineNumbers copy
|
|
396
405
|
const result = await glutenCheckerScorer.run({
|
|
397
406
|
input: [{ role: 'user', content: 'Mix rice, beans, and vegetables' }],
|
|
398
407
|
output: { text: 'Mix rice, beans, and vegetables' },
|
|
@@ -401,16 +410,16 @@ const result = await glutenCheckerScorer.run({
|
|
|
401
410
|
console.log('Score:', result.score);
|
|
402
411
|
console.log('Gluten sources:', result.analyzeStepResult.glutenSources);
|
|
403
412
|
console.log('Reason:', result.reason);
|
|
404
|
-
|
|
413
|
+
````
|
|
405
414
|
|
|
406
415
|
### High gluten-free output
|
|
407
416
|
|
|
408
417
|
```typescript
|
|
409
418
|
{
|
|
410
419
|
score: 1,
|
|
411
|
-
analyzeStepResult: {
|
|
420
|
+
analyzeStepResult: {
|
|
412
421
|
isGlutenFree: true,
|
|
413
|
-
glutenSources: []
|
|
422
|
+
glutenSources: []
|
|
414
423
|
},
|
|
415
424
|
reason: 'This recipe is gluten-free because rice, beans, and vegetables are naturally gluten-free ingredients that are safe for people with celiac disease.'
|
|
416
425
|
}
|
|
@@ -418,15 +427,15 @@ console.log('Reason:', result.reason);
|
|
|
418
427
|
|
|
419
428
|
## Partial gluten example
|
|
420
429
|
|
|
421
|
-
```typescript
|
|
430
|
+
```typescript title="src/example-partial-gluten.ts" showLineNumbers copy
|
|
422
431
|
const result = await glutenCheckerScorer.run({
|
|
423
|
-
input: [{ role:
|
|
424
|
-
output: { text:
|
|
432
|
+
input: [{ role: "user", content: "Mix flour and water to make dough" }],
|
|
433
|
+
output: { text: "Mix flour and water to make dough" },
|
|
425
434
|
});
|
|
426
435
|
|
|
427
|
-
console.log(
|
|
428
|
-
console.log(
|
|
429
|
-
console.log(
|
|
436
|
+
console.log("Score:", result.score);
|
|
437
|
+
console.log("Gluten sources:", result.analyzeStepResult.glutenSources);
|
|
438
|
+
console.log("Reason:", result.reason);
|
|
430
439
|
```
|
|
431
440
|
|
|
432
441
|
### Partial gluten output
|
|
@@ -434,9 +443,9 @@ console.log('Reason:', result.reason);
|
|
|
434
443
|
```typescript
|
|
435
444
|
{
|
|
436
445
|
score: 0,
|
|
437
|
-
analyzeStepResult: {
|
|
446
|
+
analyzeStepResult: {
|
|
438
447
|
isGlutenFree: false,
|
|
439
|
-
glutenSources: ['flour']
|
|
448
|
+
glutenSources: ['flour']
|
|
440
449
|
},
|
|
441
450
|
reason: 'This recipe is not gluten-free because it contains flour. Regular flour is made from wheat and contains gluten, making it unsafe for people with celiac disease or gluten sensitivity.'
|
|
442
451
|
}
|
|
@@ -444,15 +453,15 @@ console.log('Reason:', result.reason);
|
|
|
444
453
|
|
|
445
454
|
## Low gluten-free example
|
|
446
455
|
|
|
447
|
-
```typescript
|
|
456
|
+
```typescript title="src/example-low-gluten-free.ts" showLineNumbers copy
|
|
448
457
|
const result = await glutenCheckerScorer.run({
|
|
449
|
-
input: [{ role:
|
|
450
|
-
output: { text:
|
|
458
|
+
input: [{ role: "user", content: "Add soy sauce and noodles" }],
|
|
459
|
+
output: { text: "Add soy sauce and noodles" },
|
|
451
460
|
});
|
|
452
461
|
|
|
453
|
-
console.log(
|
|
454
|
-
console.log(
|
|
455
|
-
console.log(
|
|
462
|
+
console.log("Score:", result.score);
|
|
463
|
+
console.log("Gluten sources:", result.analyzeStepResult.glutenSources);
|
|
464
|
+
console.log("Reason:", result.reason);
|
|
456
465
|
```
|
|
457
466
|
|
|
458
467
|
### Low gluten-free output
|
|
@@ -460,14 +469,15 @@ console.log('Reason:', result.reason);
|
|
|
460
469
|
```typescript
|
|
461
470
|
{
|
|
462
471
|
score: 0,
|
|
463
|
-
analyzeStepResult: {
|
|
472
|
+
analyzeStepResult: {
|
|
464
473
|
isGlutenFree: false,
|
|
465
|
-
glutenSources: ['soy sauce', 'noodles']
|
|
474
|
+
glutenSources: ['soy sauce', 'noodles']
|
|
466
475
|
},
|
|
467
476
|
reason: 'This recipe is not gluten-free because it contains soy sauce, noodles. Regular soy sauce contains wheat and most noodles are made from wheat flour, both of which contain gluten and are unsafe for people with gluten sensitivity.'
|
|
468
477
|
}
|
|
469
478
|
```
|
|
470
479
|
|
|
471
480
|
**Examples and Resources:**
|
|
481
|
+
|
|
472
482
|
- [createScorer API Reference](/reference/scorers/create-scorer) - Complete technical documentation
|
|
473
483
|
- [Built-in Scorers Source Code](https://github.com/mastra-ai/mastra/tree/main/packages/evals/src/scorers) - Real implementations for reference
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
---
|
|
2
|
-
title: "Create a
|
|
2
|
+
title: "Create a Custom Eval | Scorers | Mastra Docs"
|
|
3
3
|
description: "Mastra allows you to create your own evals, here is how."
|
|
4
4
|
---
|
|
5
5
|
|
|
6
|
-
import { ScorerCallout } from '@/components/scorer-callout'
|
|
7
|
-
|
|
8
6
|
# Create a Custom Eval
|
|
9
7
|
|
|
10
|
-
|
|
8
|
+
:::info Scorers
|
|
9
|
+
This documentation refers to the legacy evals API. For the latest scorer features, see [Scorers](/docs/scorers/overview).
|
|
10
|
+
:::
|
|
11
11
|
|
|
12
12
|
Create a custom eval by extending the `Metric` class and implementing the `measure` method. This gives you full control over how scores are calculated and what information is returned. For LLM-based evaluations, extend the `MastraAgentJudge` class to define how the model reasons and scores output.
|
|
13
13
|
|
|
@@ -15,12 +15,10 @@ Create a custom eval by extending the `Metric` class and implementing the `measu
|
|
|
15
15
|
|
|
16
16
|
You can write lightweight custom metrics using plain JavaScript/TypeScript. These are ideal for simple string comparisons, pattern checks, or other rule-based logic.
|
|
17
17
|
|
|
18
|
-
See our [Word Inclusion example](/examples/evals/custom-native-javascript-eval
|
|
18
|
+
See our [Word Inclusion example](/examples/evals/custom-native-javascript-eval), which scores responses based on the number of reference words found in the output.
|
|
19
19
|
|
|
20
20
|
## LLM as a judge evaluation
|
|
21
21
|
|
|
22
22
|
For more complex evaluations, you can build a judge powered by an LLM. This lets you capture more nuanced criteria, like factual accuracy, tone, or reasoning.
|
|
23
23
|
|
|
24
|
-
See the [Real World Countries example](/examples/evals/custom-llm-judge-eval
|
|
25
|
-
|
|
26
|
-
|
|
24
|
+
See the [Real World Countries example](/examples/evals/custom-llm-judge-eval) for a complete walkthrough of building a custom judge and metric that evaluates real-world factual accuracy.
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
---
|
|
2
|
-
title: "
|
|
2
|
+
title: "Testing your agents with evals | Scorers | Mastra Docs"
|
|
3
3
|
description: "Understanding how to evaluate and measure AI agent quality using Mastra evals."
|
|
4
4
|
---
|
|
5
5
|
|
|
6
|
-
import { ScorerCallout } from '@/components/scorer-callout'
|
|
7
|
-
|
|
8
6
|
# Testing your agents with evals
|
|
9
7
|
|
|
10
|
-
|
|
8
|
+
:::info Scorers
|
|
9
|
+
This documentation refers to the legacy evals API. For the latest scorer features, see [Scorers](/docs/scorers/overview).
|
|
10
|
+
:::
|
|
11
11
|
|
|
12
12
|
While traditional software tests have clear pass/fail conditions, AI outputs are non-deterministic — they can vary with the same input. Evals help bridge this gap by providing quantifiable metrics for measuring agent quality.
|
|
13
13
|
|
|
@@ -35,7 +35,7 @@ npm install @mastra/evals@latest
|
|
|
35
35
|
|
|
36
36
|
Evals need to be added to an agent. Here's an example using the summarization, content similarity, and tone consistency metrics:
|
|
37
37
|
|
|
38
|
-
```typescript copy showLineNumbers
|
|
38
|
+
```typescript copy showLineNumbers title="src/mastra/agents/index.ts"
|
|
39
39
|
import { Agent } from "@mastra/core/agent";
|
|
40
40
|
import { openai } from "@ai-sdk/openai";
|
|
41
41
|
import { SummarizationMetric } from "@mastra/evals/llm";
|
|
@@ -99,8 +99,8 @@ Once you're hitting your targets:
|
|
|
99
99
|
3. Test edge cases - Add examples that cover unusual scenarios
|
|
100
100
|
4. Fine-tune - Look for ways to improve efficiency
|
|
101
101
|
|
|
102
|
-
See [Textual Evals](/docs/evals/textual-evals) for more info on what evals can do.
|
|
102
|
+
See [Textual Evals](/docs/scorers/evals-old-api/textual-evals) for more info on what evals can do.
|
|
103
103
|
|
|
104
|
-
For more info on how to create your own evals, see the [Custom Evals](/docs/evals/custom-eval) guide.
|
|
104
|
+
For more info on how to create your own evals, see the [Custom Evals](/docs/scorers/evals-old-api/custom-eval) guide.
|
|
105
105
|
|
|
106
|
-
For running evals in your CI pipeline, see the [Running in CI](/docs/evals/running-in-ci) guide.
|
|
106
|
+
For running evals in your CI pipeline, see the [Running in CI](/docs/scorers/evals-old-api/running-in-ci) guide.
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
---
|
|
2
|
-
title: "Running in CI"
|
|
2
|
+
title: "Running Evals in CI | Scorers | Mastra Docs"
|
|
3
3
|
description: "Learn how to run Mastra evals in your CI/CD pipeline to monitor agent quality over time."
|
|
4
4
|
---
|
|
5
5
|
|
|
6
|
-
import { ScorerCallout } from '@/components/scorer-callout'
|
|
7
|
-
|
|
8
6
|
# Running Evals in CI
|
|
9
7
|
|
|
10
|
-
|
|
8
|
+
:::info Scorers
|
|
9
|
+
This documentation refers to the legacy evals API. For the latest scorer features, see [Scorers](/docs/scorers/overview).
|
|
10
|
+
:::
|
|
11
11
|
|
|
12
12
|
Running evals in your CI pipeline helps bridge this gap by providing quantifiable metrics for measuring agent quality over time.
|
|
13
13
|
|
|
@@ -15,7 +15,7 @@ Running evals in your CI pipeline helps bridge this gap by providing quantifiabl
|
|
|
15
15
|
|
|
16
16
|
We support any testing framework that supports ESM modules. For example, you can use [Vitest](https://vitest.dev/), [Jest](https://jestjs.io/) or [Mocha](https://mochajs.org/) to run evals in your CI/CD pipeline.
|
|
17
17
|
|
|
18
|
-
```typescript copy showLineNumbers
|
|
18
|
+
```typescript copy showLineNumbers title="src/mastra/agents/index.test.ts"
|
|
19
19
|
import { describe, it, expect } from "vitest";
|
|
20
20
|
import { evaluate } from "@mastra/evals";
|
|
21
21
|
import { ToneConsistencyMetric } from "@mastra/evals/nlp";
|
|
@@ -39,7 +39,7 @@ You will need to configure a testSetup and globalSetup script for your testing f
|
|
|
39
39
|
|
|
40
40
|
Add these files to your project to run evals in your CI/CD pipeline:
|
|
41
41
|
|
|
42
|
-
```typescript copy showLineNumbers
|
|
42
|
+
```typescript copy showLineNumbers title="globalSetup.ts"
|
|
43
43
|
import { globalSetup } from "@mastra/evals";
|
|
44
44
|
|
|
45
45
|
export default function setup() {
|
|
@@ -47,7 +47,7 @@ export default function setup() {
|
|
|
47
47
|
}
|
|
48
48
|
```
|
|
49
49
|
|
|
50
|
-
```typescript copy showLineNumbers
|
|
50
|
+
```typescript copy showLineNumbers title="testSetup.ts"
|
|
51
51
|
import { beforeAll } from "vitest";
|
|
52
52
|
import { attachListeners } from "@mastra/evals";
|
|
53
53
|
|
|
@@ -56,7 +56,7 @@ beforeAll(async () => {
|
|
|
56
56
|
});
|
|
57
57
|
```
|
|
58
58
|
|
|
59
|
-
```typescript copy showLineNumbers
|
|
59
|
+
```typescript copy showLineNumbers title="vitest.config.ts"
|
|
60
60
|
import { defineConfig } from "vitest/config";
|
|
61
61
|
|
|
62
62
|
export default defineConfig({
|
|
@@ -71,7 +71,7 @@ export default defineConfig({
|
|
|
71
71
|
|
|
72
72
|
To store eval results in Mastra Storage and capture results in the Mastra dashboard:
|
|
73
73
|
|
|
74
|
-
```typescript copy showLineNumbers
|
|
74
|
+
```typescript copy showLineNumbers title="testSetup.ts"
|
|
75
75
|
import { beforeAll } from "vitest";
|
|
76
76
|
import { attachListeners } from "@mastra/evals";
|
|
77
77
|
import { mastra } from "./your-mastra-setup";
|
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
---
|
|
2
|
-
title: "Textual Evals"
|
|
2
|
+
title: "Textual Evals | Scorers | Mastra Docs"
|
|
3
3
|
description: "Understand how Mastra uses LLM-as-judge methodology to evaluate text quality."
|
|
4
4
|
---
|
|
5
5
|
|
|
6
|
-
import { ScorerCallout } from '@/components/scorer-callout'
|
|
7
|
-
|
|
8
6
|
# Textual Evals
|
|
9
7
|
|
|
10
|
-
|
|
8
|
+
:::info Scorers
|
|
9
|
+
This documentation refers to the legacy evals API. For the latest scorer features, see [Scorers](/docs/scorers/overview).
|
|
10
|
+
:::
|
|
11
11
|
|
|
12
12
|
Textual evals use an LLM-as-judge methodology to evaluate agent outputs. This approach leverages language models to assess various aspects of text quality, similar to how a teaching assistant might grade assignments using a rubric.
|
|
13
13
|
|
|
14
14
|
Each eval focuses on specific quality aspects and returns a score between 0 and 1, providing quantifiable metrics for non-deterministic AI outputs.
|
|
15
15
|
|
|
16
|
-
Mastra provides several eval metrics for assessing Agent outputs. Mastra is not limited to these metrics, and you can also [define your own evals](/docs/evals/custom-eval).
|
|
16
|
+
Mastra provides several eval metrics for assessing Agent outputs. Mastra is not limited to these metrics, and you can also [define your own evals](/docs/scorers/evals-old-api/custom-eval).
|
|
17
17
|
|
|
18
18
|
## Why Use Textual Evals?
|
|
19
19
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
---
|
|
2
|
-
title: "Built-in Scorers"
|
|
2
|
+
title: "Built-in Scorers | Scorers | Mastra Docs"
|
|
3
3
|
description: "Overview of Mastra's ready-to-use scorers for evaluating AI outputs across quality, safety, and performance dimensions."
|
|
4
4
|
---
|
|
5
5
|
|
|
@@ -31,10 +31,12 @@ These scorers evaluate the quality and relevance of context used in generating r
|
|
|
31
31
|
- [`context-relevance`](/reference/scorers/context-relevance): Measures context utility with nuanced relevance levels, usage tracking, and missing context detection (`0-1`, higher is better)
|
|
32
32
|
|
|
33
33
|
> tip Context Scorer Selection
|
|
34
|
+
|
|
34
35
|
- Use **Context Precision** when context ordering matters and you need standard IR metrics (ideal for RAG ranking evaluation)
|
|
35
36
|
- Use **Context Relevance** when you need detailed relevance assessment and want to track context usage and identify gaps
|
|
36
37
|
|
|
37
38
|
Both context scorers support:
|
|
39
|
+
|
|
38
40
|
- **Static context**: Pre-defined context arrays
|
|
39
41
|
- **Dynamic context extraction**: Extract context from runs using custom functions (ideal for RAG systems, vector databases, etc.)
|
|
40
42
|
|
|
@@ -1,10 +1,8 @@
|
|
|
1
1
|
---
|
|
2
|
-
title: "
|
|
2
|
+
title: "Scorers overview | Scorers | Mastra Docs"
|
|
3
3
|
description: Overview of scorers in Mastra, detailing their capabilities for evaluating AI outputs and measuring performance.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
|
-
import { Callout } from "nextra/components";
|
|
7
|
-
|
|
8
6
|
# Scorers overview
|
|
9
7
|
|
|
10
8
|
While traditional software tests have clear pass/fail conditions, AI outputs are non-deterministic — they can vary with the same input. **Scorers** help bridge this gap by providing quantifiable metrics for measuring agent quality.
|
|
@@ -37,12 +35,12 @@ npm install @mastra/evals@latest
|
|
|
37
35
|
|
|
38
36
|
You can add built-in scorers to your agents to automatically evaluate their outputs. See the [full list of built-in scorers](/docs/scorers/off-the-shelf-scorers) for all available options.
|
|
39
37
|
|
|
40
|
-
```typescript
|
|
38
|
+
```typescript title="src/mastra/agents/evaluated-agent.ts" showLineNumbers copy
|
|
41
39
|
import { Agent } from "@mastra/core/agent";
|
|
42
40
|
import { openai } from "@ai-sdk/openai";
|
|
43
|
-
import {
|
|
41
|
+
import {
|
|
44
42
|
createAnswerRelevancyScorer,
|
|
45
|
-
createToxicityScorer
|
|
43
|
+
createToxicityScorer,
|
|
46
44
|
} from "@mastra/evals/scorers/llm";
|
|
47
45
|
|
|
48
46
|
export const evaluatedAgent = new Agent({
|
|
@@ -50,13 +48,13 @@ export const evaluatedAgent = new Agent({
|
|
|
50
48
|
scorers: {
|
|
51
49
|
relevancy: {
|
|
52
50
|
scorer: createAnswerRelevancyScorer({ model: openai("gpt-4o-mini") }),
|
|
53
|
-
sampling: { type: "ratio", rate: 0.5 }
|
|
51
|
+
sampling: { type: "ratio", rate: 0.5 },
|
|
54
52
|
},
|
|
55
53
|
safety: {
|
|
56
54
|
scorer: createToxicityScorer({ model: openai("gpt-4o-mini") }),
|
|
57
|
-
sampling: { type: "ratio", rate: 1 }
|
|
58
|
-
}
|
|
59
|
-
}
|
|
55
|
+
sampling: { type: "ratio", rate: 1 },
|
|
56
|
+
},
|
|
57
|
+
},
|
|
60
58
|
});
|
|
61
59
|
```
|
|
62
60
|
|
|
@@ -64,7 +62,7 @@ export const evaluatedAgent = new Agent({
|
|
|
64
62
|
|
|
65
63
|
You can also add scorers to individual workflow steps to evaluate outputs at specific points in your process:
|
|
66
64
|
|
|
67
|
-
```typescript
|
|
65
|
+
```typescript title="src/mastra/workflows/content-generation.ts" showLineNumbers copy
|
|
68
66
|
import { createWorkflow, createStep } from "@mastra/core/workflows";
|
|
69
67
|
import { z } from "zod";
|
|
70
68
|
import { customStepScorer } from "../scorers/custom-step-scorer";
|
|
@@ -92,8 +90,9 @@ export const contentWorkflow = createWorkflow({ ... })
|
|
|
92
90
|
**Asynchronous execution**: Live evaluations run in the background without blocking your agent responses or workflow execution. This ensures your AI systems maintain their performance while still being monitored.
|
|
93
91
|
|
|
94
92
|
**Sampling control**: The `sampling.rate` parameter (0-1) controls what percentage of outputs get scored:
|
|
93
|
+
|
|
95
94
|
- `1.0`: Score every single response (100%)
|
|
96
|
-
- `0.5`: Score half of all responses (50%)
|
|
95
|
+
- `0.5`: Score half of all responses (50%)
|
|
97
96
|
- `0.1`: Score 10% of responses
|
|
98
97
|
- `0.0`: Disable scoring
|
|
99
98
|
|
|
@@ -103,11 +102,13 @@ export const contentWorkflow = createWorkflow({ ... })
|
|
|
103
102
|
|
|
104
103
|
In addition to live evaluations, you can use scorers to evaluate historical traces from your agent interactions and workflows. This is particularly useful for analyzing past performance, debugging issues, or running batch evaluations.
|
|
105
104
|
|
|
106
|
-
|
|
105
|
+
:::info
|
|
106
|
+
|
|
107
107
|
**Observability Required**
|
|
108
108
|
|
|
109
|
-
To score traces, you must first configure observability in your Mastra instance to collect trace data. See [AI Tracing documentation](../observability/ai-tracing) for setup instructions.
|
|
110
|
-
|
|
109
|
+
To score traces, you must first configure observability in your Mastra instance to collect trace data. See [AI Tracing documentation](../observability/ai-tracing/overview) for setup instructions.
|
|
110
|
+
|
|
111
|
+
:::
|
|
111
112
|
|
|
112
113
|
### Scoring traces with the playground
|
|
113
114
|
|
|
@@ -118,8 +119,8 @@ const mastra = new Mastra({
|
|
|
118
119
|
// ...
|
|
119
120
|
scorers: {
|
|
120
121
|
answerRelevancy: myAnswerRelevancyScorer,
|
|
121
|
-
responseQuality: myResponseQualityScorer
|
|
122
|
-
}
|
|
122
|
+
responseQuality: myResponseQualityScorer,
|
|
123
|
+
},
|
|
123
124
|
});
|
|
124
125
|
```
|
|
125
126
|
|
|
@@ -129,10 +130,10 @@ Once registered, you can score traces interactively within the Mastra playground
|
|
|
129
130
|
|
|
130
131
|
Mastra provides a CLI command `mastra dev` to test your scorers. The playground includes a scorers section where you can run individual scorers against test inputs and view detailed results.
|
|
131
132
|
|
|
132
|
-
For more details, see the [Local Dev Playground](/docs/
|
|
133
|
+
For more details, see the [Local Dev Playground](/docs/getting-started/studio) docs.
|
|
133
134
|
|
|
134
135
|
## Next steps
|
|
135
136
|
|
|
136
137
|
- Learn how to create your own scorers in the [Creating Custom Scorers](/docs/scorers/custom-scorers) guide
|
|
137
138
|
- Explore built-in scorers in the [Off-the-shelf Scorers](/docs/scorers/off-the-shelf-scorers) section
|
|
138
|
-
- Test scorers with the [Local Dev Playground](/docs/
|
|
139
|
+
- Test scorers with the [Local Dev Playground](/docs/getting-started/studio)
|