@mastra/mcp-docs-server 0.13.37 → 0.13.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/organized/changelogs/%40internal%2Fchangeset-cli.md +2 -0
- package/.docs/organized/changelogs/%40internal%2Fexternal-types.md +2 -0
- package/.docs/organized/changelogs/%40internal%2Fstorage-test-utils.md +8 -8
- package/.docs/organized/changelogs/%40internal%2Ftypes-builder.md +2 -0
- package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +23 -23
- package/.docs/organized/changelogs/%40mastra%2Fcore.md +69 -69
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloud.md +17 -17
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +17 -17
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-netlify.md +17 -17
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-vercel.md +17 -17
- package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +25 -25
- package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +15 -15
- package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +27 -27
- package/.docs/organized/changelogs/%40mastra%2Freact.md +15 -15
- package/.docs/organized/changelogs/%40mastra%2Fserver.md +23 -23
- package/.docs/organized/changelogs/create-mastra.md +5 -5
- package/.docs/organized/changelogs/mastra.md +17 -17
- package/.docs/raw/agents/adding-voice.mdx +20 -9
- package/.docs/raw/agents/agent-memory.mdx +55 -39
- package/.docs/raw/agents/guardrails.mdx +68 -61
- package/.docs/raw/agents/networks.mdx +50 -46
- package/.docs/raw/agents/overview.mdx +125 -88
- package/.docs/raw/agents/using-tools.mdx +14 -15
- package/.docs/raw/auth/auth0.mdx +28 -27
- package/.docs/raw/auth/clerk.mdx +22 -20
- package/.docs/raw/auth/firebase.mdx +42 -39
- package/.docs/raw/auth/index.mdx +1 -1
- package/.docs/raw/auth/jwt.mdx +18 -16
- package/.docs/raw/auth/supabase.mdx +20 -18
- package/.docs/raw/auth/workos.mdx +32 -26
- package/.docs/raw/community/contributing-templates.mdx +7 -7
- package/.docs/raw/community/discord.mdx +2 -2
- package/.docs/raw/community/licensing.mdx +1 -1
- package/.docs/raw/course/03-agent-memory/26-updating-mastra-export-comprehensive.md +0 -32
- package/.docs/raw/deployment/cloud-providers/amazon-ec2.mdx +41 -22
- package/.docs/raw/deployment/cloud-providers/aws-lambda.mdx +48 -29
- package/.docs/raw/deployment/cloud-providers/azure-app-services.mdx +52 -24
- package/.docs/raw/deployment/cloud-providers/digital-ocean.mdx +86 -55
- package/.docs/raw/deployment/cloud-providers/index.mdx +16 -13
- package/.docs/raw/deployment/monorepo.mdx +32 -42
- package/.docs/raw/deployment/overview.mdx +15 -15
- package/.docs/raw/deployment/server-deployment.mdx +18 -23
- package/.docs/raw/deployment/serverless-platforms/cloudflare-deployer.mdx +16 -17
- package/.docs/raw/deployment/serverless-platforms/index.mdx +15 -12
- package/.docs/raw/deployment/serverless-platforms/netlify-deployer.mdx +14 -23
- package/.docs/raw/deployment/serverless-platforms/vercel-deployer.mdx +16 -23
- package/.docs/raw/deployment/web-framework.mdx +14 -14
- package/.docs/raw/frameworks/agentic-uis/ai-sdk.mdx +138 -145
- package/.docs/raw/frameworks/agentic-uis/assistant-ui.mdx +54 -43
- package/.docs/raw/frameworks/agentic-uis/cedar-os.mdx +51 -36
- package/.docs/raw/frameworks/agentic-uis/copilotkit.mdx +161 -120
- package/.docs/raw/frameworks/agentic-uis/openrouter.mdx +59 -48
- package/.docs/raw/frameworks/servers/express.mdx +45 -44
- package/.docs/raw/frameworks/web-frameworks/astro.mdx +245 -162
- package/.docs/raw/frameworks/web-frameworks/next-js.mdx +112 -69
- package/.docs/raw/frameworks/web-frameworks/sveltekit.mdx +189 -164
- package/.docs/raw/frameworks/web-frameworks/vite-react.mdx +94 -81
- package/.docs/raw/getting-started/installation.mdx +164 -133
- package/.docs/raw/getting-started/mcp-docs-server.mdx +36 -39
- package/.docs/raw/getting-started/project-structure.mdx +34 -42
- package/.docs/raw/getting-started/studio.mdx +40 -58
- package/.docs/raw/getting-started/templates.mdx +22 -27
- package/.docs/raw/index.mdx +9 -9
- package/.docs/raw/mastra-cloud/dashboard.mdx +10 -10
- package/.docs/raw/mastra-cloud/observability.mdx +7 -8
- package/.docs/raw/mastra-cloud/overview.mdx +16 -22
- package/.docs/raw/mastra-cloud/setting-up.mdx +33 -20
- package/.docs/raw/memory/conversation-history.mdx +2 -2
- package/.docs/raw/memory/overview.mdx +21 -23
- package/.docs/raw/memory/semantic-recall.mdx +14 -13
- package/.docs/raw/memory/storage/memory-with-libsql.mdx +27 -28
- package/.docs/raw/memory/storage/memory-with-pg.mdx +26 -26
- package/.docs/raw/memory/storage/memory-with-upstash.mdx +26 -27
- package/.docs/raw/memory/threads-and-resources.mdx +23 -20
- package/.docs/raw/memory/working-memory.mdx +27 -38
- package/.docs/raw/observability/ai-tracing/exporters/arize.mdx +30 -29
- package/.docs/raw/observability/ai-tracing/exporters/braintrust.mdx +8 -9
- package/.docs/raw/observability/ai-tracing/exporters/cloud.mdx +17 -16
- package/.docs/raw/observability/ai-tracing/exporters/default.mdx +31 -32
- package/.docs/raw/observability/ai-tracing/exporters/langfuse.mdx +18 -17
- package/.docs/raw/observability/ai-tracing/exporters/langsmith.mdx +14 -14
- package/.docs/raw/observability/ai-tracing/exporters/otel.mdx +56 -46
- package/.docs/raw/observability/ai-tracing/overview.mdx +145 -122
- package/.docs/raw/observability/ai-tracing/processors/sensitive-data-filter.mdx +57 -36
- package/.docs/raw/observability/logging.mdx +14 -17
- package/.docs/raw/observability/nextjs-tracing.mdx +5 -5
- package/.docs/raw/observability/otel-tracing.mdx +17 -18
- package/.docs/raw/observability/overview.mdx +14 -10
- package/.docs/raw/rag/chunking-and-embedding.mdx +10 -8
- package/.docs/raw/rag/overview.mdx +5 -5
- package/.docs/raw/rag/retrieval.mdx +125 -107
- package/.docs/raw/rag/vector-databases.mdx +232 -223
- package/.docs/raw/reference/agents/agent.mdx +54 -41
- package/.docs/raw/reference/agents/generate.mdx +367 -261
- package/.docs/raw/reference/agents/generateLegacy.mdx +260 -178
- package/.docs/raw/reference/agents/getDefaultGenerateOptions.mdx +7 -5
- package/.docs/raw/reference/agents/getDefaultStreamOptions.mdx +7 -5
- package/.docs/raw/reference/agents/getDescription.mdx +3 -2
- package/.docs/raw/reference/agents/getInstructions.mdx +7 -5
- package/.docs/raw/reference/agents/getLLM.mdx +11 -7
- package/.docs/raw/reference/agents/getMemory.mdx +7 -5
- package/.docs/raw/reference/agents/getModel.mdx +7 -5
- package/.docs/raw/reference/agents/getScorers.mdx +7 -5
- package/.docs/raw/reference/agents/getTools.mdx +7 -5
- package/.docs/raw/reference/agents/getVoice.mdx +7 -5
- package/.docs/raw/reference/agents/getWorkflows.mdx +7 -5
- package/.docs/raw/reference/agents/listAgents.mdx +6 -4
- package/.docs/raw/reference/agents/network.mdx +171 -116
- package/.docs/raw/reference/auth/auth0.mdx +18 -14
- package/.docs/raw/reference/auth/clerk.mdx +15 -12
- package/.docs/raw/reference/auth/firebase.mdx +23 -16
- package/.docs/raw/reference/auth/jwt.mdx +7 -6
- package/.docs/raw/reference/auth/supabase.mdx +13 -10
- package/.docs/raw/reference/auth/workos.mdx +17 -13
- package/.docs/raw/reference/cli/create-mastra.mdx +61 -44
- package/.docs/raw/reference/cli/mastra.mdx +11 -11
- package/.docs/raw/reference/client-js/agents.mdx +40 -43
- package/.docs/raw/reference/client-js/error-handling.mdx +1 -1
- package/.docs/raw/reference/client-js/logs.mdx +1 -1
- package/.docs/raw/reference/client-js/mastra-client.mdx +21 -13
- package/.docs/raw/reference/client-js/memory.mdx +6 -2
- package/.docs/raw/reference/client-js/observability.mdx +10 -9
- package/.docs/raw/reference/client-js/telemetry.mdx +1 -1
- package/.docs/raw/reference/client-js/tools.mdx +1 -1
- package/.docs/raw/reference/client-js/vectors.mdx +1 -1
- package/.docs/raw/reference/client-js/workflows-legacy.mdx +1 -1
- package/.docs/raw/reference/client-js/workflows.mdx +7 -7
- package/.docs/raw/reference/core/getAgent.mdx +6 -5
- package/.docs/raw/reference/core/getAgentById.mdx +5 -4
- package/.docs/raw/reference/core/getAgents.mdx +3 -3
- package/.docs/raw/reference/core/getDeployer.mdx +4 -3
- package/.docs/raw/reference/core/getLogger.mdx +4 -3
- package/.docs/raw/reference/core/getLogs.mdx +10 -6
- package/.docs/raw/reference/core/getLogsByRunId.mdx +8 -5
- package/.docs/raw/reference/core/getMCPServer.mdx +8 -5
- package/.docs/raw/reference/core/getMCPServers.mdx +4 -3
- package/.docs/raw/reference/core/getMemory.mdx +4 -3
- package/.docs/raw/reference/core/getScorer.mdx +15 -12
- package/.docs/raw/reference/core/getScorerByName.mdx +12 -9
- package/.docs/raw/reference/core/getScorers.mdx +3 -2
- package/.docs/raw/reference/core/getServer.mdx +4 -3
- package/.docs/raw/reference/core/getStorage.mdx +4 -3
- package/.docs/raw/reference/core/getTelemetry.mdx +4 -3
- package/.docs/raw/reference/core/getVector.mdx +6 -4
- package/.docs/raw/reference/core/getVectors.mdx +4 -3
- package/.docs/raw/reference/core/getWorkflow.mdx +7 -4
- package/.docs/raw/reference/core/getWorkflows.mdx +5 -3
- package/.docs/raw/reference/core/mastra-class.mdx +16 -16
- package/.docs/raw/reference/core/setLogger.mdx +6 -4
- package/.docs/raw/reference/core/setStorage.mdx +4 -4
- package/.docs/raw/reference/core/setTelemetry.mdx +4 -3
- package/.docs/raw/reference/deployer/cloudflare.mdx +11 -7
- package/.docs/raw/reference/deployer/deployer.mdx +2 -1
- package/.docs/raw/reference/deployer/netlify.mdx +4 -4
- package/.docs/raw/reference/deployer/vercel.mdx +6 -6
- package/.docs/raw/reference/evals/answer-relevancy.mdx +4 -5
- package/.docs/raw/reference/evals/bias.mdx +4 -5
- package/.docs/raw/reference/evals/completeness.mdx +4 -5
- package/.docs/raw/reference/evals/content-similarity.mdx +4 -5
- package/.docs/raw/reference/evals/context-position.mdx +4 -5
- package/.docs/raw/reference/evals/context-precision.mdx +4 -5
- package/.docs/raw/reference/evals/context-relevancy.mdx +4 -5
- package/.docs/raw/reference/evals/contextual-recall.mdx +4 -5
- package/.docs/raw/reference/evals/faithfulness.mdx +4 -5
- package/.docs/raw/reference/evals/hallucination.mdx +4 -6
- package/.docs/raw/reference/evals/keyword-coverage.mdx +4 -5
- package/.docs/raw/reference/evals/prompt-alignment.mdx +4 -6
- package/.docs/raw/reference/evals/summarization.mdx +4 -6
- package/.docs/raw/reference/evals/textual-difference.mdx +4 -5
- package/.docs/raw/reference/evals/tone-consistency.mdx +4 -7
- package/.docs/raw/reference/evals/toxicity.mdx +4 -5
- package/.docs/raw/reference/index.mdx +3 -3
- package/.docs/raw/reference/legacyWorkflows/after.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/afterEvent.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/commit.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/createRun.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/else.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/events.mdx +7 -7
- package/.docs/raw/reference/legacyWorkflows/execute.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/if.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/resume.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/resumeWithEvent.mdx +6 -6
- package/.docs/raw/reference/legacyWorkflows/snapshots.mdx +6 -6
- package/.docs/raw/reference/legacyWorkflows/start.mdx +6 -6
- package/.docs/raw/reference/legacyWorkflows/step-class.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/step-condition.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/step-function.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/step-options.mdx +6 -6
- package/.docs/raw/reference/legacyWorkflows/step-retries.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/suspend.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/then.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/until.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/watch.mdx +3 -3
- package/.docs/raw/reference/legacyWorkflows/while.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/workflow.mdx +7 -7
- package/.docs/raw/reference/memory/createThread.mdx +10 -10
- package/.docs/raw/reference/memory/deleteMessages.mdx +7 -5
- package/.docs/raw/reference/memory/getThreadById.mdx +6 -5
- package/.docs/raw/reference/memory/getThreadsByResourceId.mdx +11 -10
- package/.docs/raw/reference/memory/getThreadsByResourceIdPaginated.mdx +10 -9
- package/.docs/raw/reference/memory/{Memory.mdx → memory-class.mdx} +53 -46
- package/.docs/raw/reference/memory/query.mdx +39 -25
- package/.docs/raw/reference/observability/ai-tracing/ai-tracing.mdx +9 -5
- package/.docs/raw/reference/observability/ai-tracing/configuration.mdx +26 -22
- package/.docs/raw/reference/observability/ai-tracing/exporters/arize.mdx +16 -12
- package/.docs/raw/reference/observability/ai-tracing/exporters/braintrust.mdx +14 -14
- package/.docs/raw/reference/observability/ai-tracing/exporters/cloud-exporter.mdx +16 -11
- package/.docs/raw/reference/observability/ai-tracing/exporters/console-exporter.mdx +10 -6
- package/.docs/raw/reference/observability/ai-tracing/exporters/default-exporter.mdx +23 -15
- package/.docs/raw/reference/observability/ai-tracing/exporters/langfuse.mdx +6 -6
- package/.docs/raw/reference/observability/ai-tracing/exporters/langsmith.mdx +13 -13
- package/.docs/raw/reference/observability/ai-tracing/exporters/otel.mdx +44 -36
- package/.docs/raw/reference/observability/ai-tracing/interfaces.mdx +115 -106
- package/.docs/raw/reference/observability/ai-tracing/processors/sensitive-data-filter.mdx +37 -32
- package/.docs/raw/reference/observability/ai-tracing/span.mdx +29 -26
- package/.docs/raw/reference/observability/logging/pino-logger.mdx +13 -15
- package/.docs/raw/reference/observability/otel-tracing/otel-config.mdx +2 -2
- package/.docs/raw/reference/observability/otel-tracing/providers/arize-ax.mdx +3 -3
- package/.docs/raw/reference/observability/otel-tracing/providers/arize-phoenix.mdx +2 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/dash0.mdx +1 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/index.mdx +12 -14
- package/.docs/raw/reference/observability/otel-tracing/providers/keywordsai.mdx +3 -3
- package/.docs/raw/reference/observability/otel-tracing/providers/laminar.mdx +1 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/langfuse.mdx +4 -4
- package/.docs/raw/reference/observability/otel-tracing/providers/langsmith.mdx +1 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/langwatch.mdx +2 -2
- package/.docs/raw/reference/observability/otel-tracing/providers/new-relic.mdx +1 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/signoz.mdx +1 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/traceloop.mdx +1 -1
- package/.docs/raw/reference/processors/batch-parts-processor.mdx +13 -10
- package/.docs/raw/reference/processors/language-detector.mdx +27 -17
- package/.docs/raw/reference/processors/moderation-processor.mdx +26 -17
- package/.docs/raw/reference/processors/pii-detector.mdx +28 -18
- package/.docs/raw/reference/processors/prompt-injection-detector.mdx +25 -17
- package/.docs/raw/reference/processors/system-prompt-scrubber.mdx +26 -17
- package/.docs/raw/reference/processors/token-limiter-processor.mdx +22 -15
- package/.docs/raw/reference/processors/unicode-normalizer.mdx +13 -12
- package/.docs/raw/reference/rag/chunk.mdx +41 -23
- package/.docs/raw/reference/rag/database-config.mdx +56 -38
- package/.docs/raw/reference/rag/document.mdx +1 -1
- package/.docs/raw/reference/rag/embeddings.mdx +1 -1
- package/.docs/raw/reference/rag/extract-params.mdx +1 -1
- package/.docs/raw/reference/rag/graph-rag.mdx +1 -1
- package/.docs/raw/reference/rag/metadata-filters.mdx +23 -26
- package/.docs/raw/reference/rag/rerank.mdx +1 -1
- package/.docs/raw/reference/rag/rerankWithScorer.mdx +2 -2
- package/.docs/raw/reference/scorers/answer-relevancy.mdx +35 -17
- package/.docs/raw/reference/scorers/answer-similarity.mdx +84 -68
- package/.docs/raw/reference/scorers/bias.mdx +22 -19
- package/.docs/raw/reference/scorers/completeness.mdx +21 -16
- package/.docs/raw/reference/scorers/content-similarity.mdx +12 -10
- package/.docs/raw/reference/scorers/context-precision.mdx +73 -64
- package/.docs/raw/reference/scorers/context-relevance.mdx +142 -126
- package/.docs/raw/reference/scorers/create-scorer.mdx +93 -61
- package/.docs/raw/reference/scorers/faithfulness.mdx +21 -13
- package/.docs/raw/reference/scorers/hallucination.mdx +17 -12
- package/.docs/raw/reference/scorers/keyword-coverage.mdx +32 -27
- package/.docs/raw/reference/scorers/mastra-scorer.mdx +39 -33
- package/.docs/raw/reference/scorers/noise-sensitivity.mdx +203 -152
- package/.docs/raw/reference/scorers/prompt-alignment.mdx +186 -132
- package/.docs/raw/reference/scorers/run-experiment.mdx +40 -31
- package/.docs/raw/reference/scorers/textual-difference.mdx +25 -26
- package/.docs/raw/reference/scorers/tone-consistency.mdx +29 -26
- package/.docs/raw/reference/scorers/tool-call-accuracy.mdx +180 -176
- package/.docs/raw/reference/scorers/toxicity.mdx +35 -31
- package/.docs/raw/reference/storage/cloudflare-d1.mdx +1 -1
- package/.docs/raw/reference/storage/cloudflare.mdx +1 -1
- package/.docs/raw/reference/storage/dynamodb.mdx +1 -1
- package/.docs/raw/reference/storage/lance.mdx +1 -1
- package/.docs/raw/reference/storage/libsql.mdx +2 -2
- package/.docs/raw/reference/storage/mongodb.mdx +4 -5
- package/.docs/raw/reference/storage/mssql.mdx +5 -4
- package/.docs/raw/reference/storage/postgresql.mdx +35 -33
- package/.docs/raw/reference/storage/upstash.mdx +6 -5
- package/.docs/raw/reference/streaming/ChunkType.mdx +788 -314
- package/.docs/raw/reference/streaming/agents/MastraModelOutput.mdx +265 -109
- package/.docs/raw/reference/streaming/agents/stream.mdx +375 -266
- package/.docs/raw/reference/streaming/agents/streamLegacy.mdx +233 -162
- package/.docs/raw/reference/streaming/workflows/observeStream.mdx +4 -4
- package/.docs/raw/reference/streaming/workflows/observeStreamVNext.mdx +5 -5
- package/.docs/raw/reference/streaming/workflows/resumeStreamVNext.mdx +24 -20
- package/.docs/raw/reference/streaming/workflows/stream.mdx +35 -26
- package/.docs/raw/reference/streaming/workflows/streamVNext.mdx +36 -27
- package/.docs/raw/reference/templates/overview.mdx +16 -39
- package/.docs/raw/reference/tools/client.mdx +1 -1
- package/.docs/raw/reference/tools/create-tool.mdx +45 -35
- package/.docs/raw/reference/tools/document-chunker-tool.mdx +2 -2
- package/.docs/raw/reference/tools/graph-rag-tool.mdx +12 -12
- package/.docs/raw/reference/tools/mcp-client.mdx +70 -64
- package/.docs/raw/reference/tools/mcp-server.mdx +91 -78
- package/.docs/raw/reference/tools/vector-query-tool.mdx +48 -38
- package/.docs/raw/reference/vectors/astra.mdx +1 -1
- package/.docs/raw/reference/vectors/chroma.mdx +25 -19
- package/.docs/raw/reference/vectors/couchbase.mdx +4 -4
- package/.docs/raw/reference/vectors/lance.mdx +5 -6
- package/.docs/raw/reference/vectors/libsql.mdx +1 -1
- package/.docs/raw/reference/vectors/mongodb.mdx +1 -1
- package/.docs/raw/reference/vectors/opensearch.mdx +1 -1
- package/.docs/raw/reference/vectors/pg.mdx +8 -4
- package/.docs/raw/reference/vectors/pinecone.mdx +1 -1
- package/.docs/raw/reference/vectors/qdrant.mdx +1 -1
- package/.docs/raw/reference/vectors/s3vectors.mdx +35 -27
- package/.docs/raw/reference/vectors/turbopuffer.mdx +1 -1
- package/.docs/raw/reference/vectors/upstash.mdx +33 -25
- package/.docs/raw/reference/vectors/vectorize.mdx +1 -1
- package/.docs/raw/reference/voice/azure.mdx +1 -1
- package/.docs/raw/reference/voice/cloudflare.mdx +1 -1
- package/.docs/raw/reference/voice/composite-voice.mdx +1 -1
- package/.docs/raw/reference/voice/deepgram.mdx +1 -1
- package/.docs/raw/reference/voice/elevenlabs.mdx +1 -1
- package/.docs/raw/reference/voice/google-gemini-live.mdx +6 -4
- package/.docs/raw/reference/voice/google.mdx +1 -1
- package/.docs/raw/reference/voice/mastra-voice.mdx +1 -1
- package/.docs/raw/reference/voice/murf.mdx +1 -1
- package/.docs/raw/reference/voice/openai-realtime.mdx +1 -1
- package/.docs/raw/reference/voice/openai.mdx +1 -1
- package/.docs/raw/reference/voice/playai.mdx +1 -1
- package/.docs/raw/reference/voice/sarvam.mdx +1 -1
- package/.docs/raw/reference/voice/speechify.mdx +1 -1
- package/.docs/raw/reference/voice/voice.addInstructions.mdx +1 -1
- package/.docs/raw/reference/voice/voice.addTools.mdx +1 -1
- package/.docs/raw/reference/voice/voice.answer.mdx +1 -1
- package/.docs/raw/reference/voice/voice.close.mdx +1 -1
- package/.docs/raw/reference/voice/voice.connect.mdx +1 -1
- package/.docs/raw/reference/voice/voice.events.mdx +1 -1
- package/.docs/raw/reference/voice/voice.getSpeakers.mdx +23 -30
- package/.docs/raw/reference/voice/voice.listen.mdx +1 -1
- package/.docs/raw/reference/voice/voice.off.mdx +1 -1
- package/.docs/raw/reference/voice/voice.on.mdx +1 -1
- package/.docs/raw/reference/voice/voice.send.mdx +1 -1
- package/.docs/raw/reference/voice/voice.speak.mdx +1 -1
- package/.docs/raw/reference/voice/voice.updateConfig.mdx +1 -1
- package/.docs/raw/reference/workflows/run-methods/cancel.mdx +4 -3
- package/.docs/raw/reference/workflows/run-methods/resume.mdx +49 -34
- package/.docs/raw/reference/workflows/run-methods/start.mdx +43 -31
- package/.docs/raw/reference/workflows/run-methods/watch.mdx +7 -8
- package/.docs/raw/reference/workflows/run.mdx +7 -10
- package/.docs/raw/reference/workflows/step.mdx +15 -12
- package/.docs/raw/reference/workflows/workflow-methods/branch.mdx +2 -2
- package/.docs/raw/reference/workflows/workflow-methods/commit.mdx +1 -1
- package/.docs/raw/reference/workflows/workflow-methods/create-run.mdx +7 -7
- package/.docs/raw/reference/workflows/workflow-methods/dountil.mdx +2 -2
- package/.docs/raw/reference/workflows/workflow-methods/dowhile.mdx +2 -2
- package/.docs/raw/reference/workflows/workflow-methods/foreach.mdx +1 -1
- package/.docs/raw/reference/workflows/workflow-methods/map.mdx +3 -2
- package/.docs/raw/reference/workflows/workflow-methods/parallel.mdx +1 -1
- package/.docs/raw/reference/workflows/workflow-methods/sendEvent.mdx +5 -5
- package/.docs/raw/reference/workflows/workflow-methods/sleep.mdx +3 -2
- package/.docs/raw/reference/workflows/workflow-methods/sleepUntil.mdx +3 -2
- package/.docs/raw/reference/workflows/workflow-methods/then.mdx +1 -1
- package/.docs/raw/reference/workflows/workflow-methods/waitForEvent.mdx +3 -3
- package/.docs/raw/reference/workflows/workflow.mdx +13 -10
- package/.docs/raw/scorers/custom-scorers.mdx +58 -48
- package/.docs/raw/scorers/evals-old-api/custom-eval.mdx +6 -8
- package/.docs/raw/scorers/evals-old-api/overview.mdx +8 -8
- package/.docs/raw/scorers/evals-old-api/running-in-ci.mdx +9 -9
- package/.docs/raw/scorers/evals-old-api/textual-evals.mdx +5 -5
- package/.docs/raw/scorers/off-the-shelf-scorers.mdx +3 -1
- package/.docs/raw/scorers/overview.mdx +20 -19
- package/.docs/raw/server-db/custom-api-routes.mdx +8 -8
- package/.docs/raw/server-db/mastra-client.mdx +56 -54
- package/.docs/raw/server-db/middleware.mdx +11 -7
- package/.docs/raw/server-db/production-server.mdx +5 -7
- package/.docs/raw/server-db/runtime-context.mdx +41 -38
- package/.docs/raw/server-db/storage.mdx +82 -87
- package/.docs/raw/streaming/events.mdx +24 -16
- package/.docs/raw/streaming/overview.mdx +17 -18
- package/.docs/raw/streaming/tool-streaming.mdx +9 -10
- package/.docs/raw/streaming/workflow-streaming.mdx +14 -12
- package/.docs/raw/tools-mcp/advanced-usage.mdx +2 -2
- package/.docs/raw/tools-mcp/mcp-overview.mdx +92 -102
- package/.docs/raw/tools-mcp/overview.mdx +9 -14
- package/.docs/raw/voice/overview.mdx +273 -250
- package/.docs/raw/voice/speech-to-speech.mdx +14 -12
- package/.docs/raw/voice/speech-to-text.mdx +2 -2
- package/.docs/raw/voice/text-to-speech.mdx +2 -2
- package/.docs/raw/workflows/agents-and-tools.mdx +29 -28
- package/.docs/raw/workflows/control-flow.mdx +24 -24
- package/.docs/raw/workflows/error-handling.mdx +15 -17
- package/.docs/raw/workflows/human-in-the-loop.mdx +39 -39
- package/.docs/raw/workflows/inngest-workflow.mdx +33 -29
- package/.docs/raw/workflows/input-data-mapping.mdx +9 -9
- package/.docs/raw/workflows/overview.mdx +60 -60
- package/.docs/raw/workflows/snapshots.mdx +54 -36
- package/.docs/raw/workflows/suspend-and-resume.mdx +52 -57
- package/.docs/raw/workflows-legacy/control-flow.mdx +15 -17
- package/.docs/raw/workflows-legacy/dynamic-workflows.mdx +3 -1
- package/.docs/raw/workflows-legacy/error-handling.mdx +8 -6
- package/.docs/raw/workflows-legacy/nested-workflows.mdx +6 -0
- package/.docs/raw/workflows-legacy/overview.mdx +28 -26
- package/.docs/raw/workflows-legacy/runtime-variables.mdx +4 -2
- package/.docs/raw/workflows-legacy/steps.mdx +5 -3
- package/.docs/raw/workflows-legacy/suspend-and-resume.mdx +10 -8
- package/.docs/raw/workflows-legacy/variables.mdx +10 -8
- package/CHANGELOG.md +14 -0
- package/package.json +5 -5
- package/.docs/raw/memory/storage/memory-with-mongodb.mdx +0 -148
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
---
|
|
2
|
-
title: "Reference: Answer Relevancy | Scorers | Mastra Docs"
|
|
2
|
+
title: "Reference: Answer Relevancy Scorer | Scorers | Mastra Docs"
|
|
3
3
|
description: Documentation for the Answer Relevancy Scorer in Mastra, which evaluates how well LLM outputs address the input query.
|
|
4
4
|
---
|
|
5
5
|
|
|
@@ -53,7 +53,8 @@ This function returns an instance of the MastraScorer class. The `.run()` method
|
|
|
53
53
|
{
|
|
54
54
|
name: "preprocessPrompt",
|
|
55
55
|
type: "string",
|
|
56
|
-
description:
|
|
56
|
+
description:
|
|
57
|
+
"The prompt sent to the LLM for the preprocess step (optional).",
|
|
57
58
|
},
|
|
58
59
|
{
|
|
59
60
|
name: "preprocessStepResult",
|
|
@@ -63,12 +64,14 @@ This function returns an instance of the MastraScorer class. The `.run()` method
|
|
|
63
64
|
{
|
|
64
65
|
name: "analyzePrompt",
|
|
65
66
|
type: "string",
|
|
66
|
-
description:
|
|
67
|
+
description:
|
|
68
|
+
"The prompt sent to the LLM for the analyze step (optional).",
|
|
67
69
|
},
|
|
68
70
|
{
|
|
69
71
|
name: "analyzeStepResult",
|
|
70
72
|
type: "object",
|
|
71
|
-
description:
|
|
73
|
+
description:
|
|
74
|
+
"Object with results: { results: Array<{ result: 'yes' | 'unsure' | 'no', reason: string }> }",
|
|
72
75
|
},
|
|
73
76
|
{
|
|
74
77
|
name: "generateReasonPrompt",
|
|
@@ -115,13 +118,20 @@ A relevancy score between 0 and 1:
|
|
|
115
118
|
|
|
116
119
|
In this example, the response accurately addresses the input query with specific and relevant information.
|
|
117
120
|
|
|
118
|
-
```typescript
|
|
121
|
+
```typescript title="src/example-high-answer-relevancy.ts" showLineNumbers copy
|
|
119
122
|
import { createAnswerRelevancyScorer } from "@mastra/evals/scorers/llm";
|
|
120
123
|
|
|
121
|
-
const scorer = createAnswerRelevancyScorer({ model:
|
|
124
|
+
const scorer = createAnswerRelevancyScorer({ model: "openai/gpt-4o-mini" });
|
|
122
125
|
|
|
123
|
-
const inputMessages = [
|
|
124
|
-
|
|
126
|
+
const inputMessages = [
|
|
127
|
+
{
|
|
128
|
+
role: "user",
|
|
129
|
+
content: "What are the health benefits of regular exercise?",
|
|
130
|
+
},
|
|
131
|
+
];
|
|
132
|
+
const outputMessage = {
|
|
133
|
+
text: "Regular exercise improves cardiovascular health, strengthens muscles, boosts metabolism, and enhances mental well-being through the release of endorphins.",
|
|
134
|
+
};
|
|
125
135
|
|
|
126
136
|
const result = await scorer.run({
|
|
127
137
|
input: inputMessages,
|
|
@@ -146,13 +156,17 @@ The output receives a high score because it accurately answers the query without
|
|
|
146
156
|
|
|
147
157
|
In this example, the response addresses the query in part but includes additional information that isn’t directly relevant.
|
|
148
158
|
|
|
149
|
-
```typescript
|
|
159
|
+
```typescript title="src/example-partial-answer-relevancy.ts" showLineNumbers copy
|
|
150
160
|
import { createAnswerRelevancyScorer } from "@mastra/evals/scorers/llm";
|
|
151
161
|
|
|
152
|
-
const scorer = createAnswerRelevancyScorer({ model:
|
|
162
|
+
const scorer = createAnswerRelevancyScorer({ model: "openai/gpt-4o-mini" });
|
|
153
163
|
|
|
154
|
-
const inputMessages = [
|
|
155
|
-
|
|
164
|
+
const inputMessages = [
|
|
165
|
+
{ role: "user", content: "What should a healthy breakfast include?" },
|
|
166
|
+
];
|
|
167
|
+
const outputMessage = {
|
|
168
|
+
text: "A nutritious breakfast should include whole grains and protein. However, the timing of your breakfast is just as important - studies show eating within 2 hours of waking optimizes metabolism and energy levels throughout the day.",
|
|
169
|
+
};
|
|
156
170
|
|
|
157
171
|
const result = await scorer.run({
|
|
158
172
|
input: inputMessages,
|
|
@@ -177,13 +191,17 @@ The output receives a lower score because it partially answers the query. While
|
|
|
177
191
|
|
|
178
192
|
In this example, the response does not address the query and contains information that is entirely unrelated.
|
|
179
193
|
|
|
180
|
-
```typescript
|
|
194
|
+
```typescript title="src/example-low-answer-relevancy.ts" showLineNumbers copy
|
|
181
195
|
import { createAnswerRelevancyScorer } from "@mastra/evals/scorers/llm";
|
|
182
196
|
|
|
183
|
-
const scorer = createAnswerRelevancyScorer({ model:
|
|
197
|
+
const scorer = createAnswerRelevancyScorer({ model: "openai/gpt-4o-mini" });
|
|
184
198
|
|
|
185
|
-
const inputMessages = [
|
|
186
|
-
|
|
199
|
+
const inputMessages = [
|
|
200
|
+
{ role: "user", content: "What are the benefits of meditation?" },
|
|
201
|
+
];
|
|
202
|
+
const outputMessage = {
|
|
203
|
+
text: "The Great Wall of China is over 13,000 miles long and was built during the Ming Dynasty to protect against invasions.",
|
|
204
|
+
};
|
|
187
205
|
|
|
188
206
|
const result = await scorer.run({
|
|
189
207
|
input: inputMessages,
|
|
@@ -206,4 +224,4 @@ The output receives a score of 0 because it fails to answer the query or provide
|
|
|
206
224
|
|
|
207
225
|
## Related
|
|
208
226
|
|
|
209
|
-
- [Faithfulness Scorer](./faithfulness)
|
|
227
|
+
- [Faithfulness Scorer](./faithfulness)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
---
|
|
2
|
-
title: "Reference: Answer Similarity | Scorers | Mastra Docs"
|
|
2
|
+
title: "Reference: Answer Similarity Scorer | Scorers | Mastra Docs"
|
|
3
3
|
description: Documentation for the Answer Similarity Scorer in Mastra, which compares agent outputs against ground truth answers for CI/CD testing.
|
|
4
4
|
---
|
|
5
5
|
|
|
@@ -15,7 +15,8 @@ The `createAnswerSimilarityScorer()` function creates a scorer that evaluates ho
|
|
|
15
15
|
name: "model",
|
|
16
16
|
type: "LanguageModel",
|
|
17
17
|
required: true,
|
|
18
|
-
description:
|
|
18
|
+
description:
|
|
19
|
+
"The language model used to evaluate semantic similarity between outputs and ground truth.",
|
|
19
20
|
},
|
|
20
21
|
{
|
|
21
22
|
name: "options",
|
|
@@ -35,7 +36,8 @@ The `createAnswerSimilarityScorer()` function creates a scorer that evaluates ho
|
|
|
35
36
|
type: "boolean",
|
|
36
37
|
required: false,
|
|
37
38
|
defaultValue: "true",
|
|
38
|
-
description:
|
|
39
|
+
description:
|
|
40
|
+
"Whether to require ground truth for evaluation. If false, missing ground truth returns score 0.",
|
|
39
41
|
},
|
|
40
42
|
{
|
|
41
43
|
name: "semanticThreshold",
|
|
@@ -63,14 +65,16 @@ The `createAnswerSimilarityScorer()` function creates a scorer that evaluates ho
|
|
|
63
65
|
type: "number",
|
|
64
66
|
required: false,
|
|
65
67
|
defaultValue: "1.0",
|
|
66
|
-
description:
|
|
68
|
+
description:
|
|
69
|
+
"Penalty for contradictory information. High value ensures wrong answers score near 0.",
|
|
67
70
|
},
|
|
68
71
|
{
|
|
69
72
|
name: "extraInfoPenalty",
|
|
70
73
|
type: "number",
|
|
71
74
|
required: false,
|
|
72
75
|
defaultValue: "0.05",
|
|
73
|
-
description:
|
|
76
|
+
description:
|
|
77
|
+
"Mild penalty for extra information not present in ground truth (capped at 0.2).",
|
|
74
78
|
},
|
|
75
79
|
{
|
|
76
80
|
name: "scale",
|
|
@@ -96,12 +100,14 @@ This function returns an instance of the MastraScorer class. The `.run()` method
|
|
|
96
100
|
{
|
|
97
101
|
name: "score",
|
|
98
102
|
type: "number",
|
|
99
|
-
description:
|
|
103
|
+
description:
|
|
104
|
+
"Similarity score between 0-1 (or 0-scale if custom scale used). Higher scores indicate better similarity to ground truth.",
|
|
100
105
|
},
|
|
101
106
|
{
|
|
102
107
|
name: "reason",
|
|
103
108
|
type: "string",
|
|
104
|
-
description:
|
|
109
|
+
description:
|
|
110
|
+
"Human-readable explanation of the score with actionable feedback.",
|
|
105
111
|
},
|
|
106
112
|
{
|
|
107
113
|
name: "preprocessStepResult",
|
|
@@ -111,7 +117,8 @@ This function returns an instance of the MastraScorer class. The `.run()` method
|
|
|
111
117
|
{
|
|
112
118
|
name: "analyzeStepResult",
|
|
113
119
|
type: "object",
|
|
114
|
-
description:
|
|
120
|
+
description:
|
|
121
|
+
"Detailed analysis of matches, contradictions, and extra information.",
|
|
115
122
|
},
|
|
116
123
|
{
|
|
117
124
|
name: "preprocessPrompt",
|
|
@@ -149,24 +156,26 @@ Score calculation: `max(0, base_score - contradiction_penalty - missing_penalty
|
|
|
149
156
|
This scorer is designed for use with `runExperiment` for CI/CD testing:
|
|
150
157
|
|
|
151
158
|
```typescript
|
|
152
|
-
import { runExperiment } from
|
|
153
|
-
import { createAnswerSimilarityScorer } from
|
|
159
|
+
import { runExperiment } from "@mastra/core/scores";
|
|
160
|
+
import { createAnswerSimilarityScorer } from "@mastra/evals/scorers/llm";
|
|
154
161
|
|
|
155
162
|
const scorer = createAnswerSimilarityScorer({ model });
|
|
156
163
|
|
|
157
164
|
await runExperiment({
|
|
158
165
|
data: [
|
|
159
|
-
{
|
|
166
|
+
{
|
|
160
167
|
input: "What is the capital of France?",
|
|
161
|
-
groundTruth: "Paris is the capital of France"
|
|
162
|
-
}
|
|
168
|
+
groundTruth: "Paris is the capital of France",
|
|
169
|
+
},
|
|
163
170
|
],
|
|
164
171
|
scorers: [scorer],
|
|
165
172
|
target: myAgent,
|
|
166
173
|
onItemComplete: ({ scorerResults }) => {
|
|
167
174
|
// Assert similarity score meets threshold
|
|
168
|
-
expect(scorerResults[
|
|
169
|
-
|
|
175
|
+
expect(scorerResults["Answer Similarity Scorer"].score).toBeGreaterThan(
|
|
176
|
+
0.8,
|
|
177
|
+
);
|
|
178
|
+
},
|
|
170
179
|
});
|
|
171
180
|
```
|
|
172
181
|
|
|
@@ -174,19 +183,19 @@ await runExperiment({
|
|
|
174
183
|
|
|
175
184
|
In this example, the agent's output semantically matches the ground truth perfectly.
|
|
176
185
|
|
|
177
|
-
```typescript
|
|
186
|
+
```typescript title="src/example-perfect-similarity.ts" showLineNumbers copy
|
|
178
187
|
import { runExperiment } from "@mastra/core/scores";
|
|
179
188
|
import { createAnswerSimilarityScorer } from "@mastra/evals/scorers/llm";
|
|
180
189
|
import { myAgent } from "./agent";
|
|
181
190
|
|
|
182
|
-
const scorer = createAnswerSimilarityScorer({ model:
|
|
191
|
+
const scorer = createAnswerSimilarityScorer({ model: "openai/gpt-4o-mini" });
|
|
183
192
|
|
|
184
193
|
const result = await runExperiment({
|
|
185
194
|
data: [
|
|
186
|
-
{
|
|
195
|
+
{
|
|
187
196
|
input: "What is 2+2?",
|
|
188
|
-
groundTruth: "4"
|
|
189
|
-
}
|
|
197
|
+
groundTruth: "4",
|
|
198
|
+
},
|
|
190
199
|
],
|
|
191
200
|
scorers: [scorer],
|
|
192
201
|
target: myAgent,
|
|
@@ -212,19 +221,19 @@ The output receives a perfect score because both the agent's answer and ground t
|
|
|
212
221
|
|
|
213
222
|
In this example, the agent provides the same information as the ground truth but with different phrasing.
|
|
214
223
|
|
|
215
|
-
```typescript
|
|
224
|
+
```typescript title="src/example-semantic-similarity.ts" showLineNumbers copy
|
|
216
225
|
import { runExperiment } from "@mastra/core/scores";
|
|
217
226
|
import { createAnswerSimilarityScorer } from "@mastra/evals/scorers/llm";
|
|
218
227
|
import { myAgent } from "./agent";
|
|
219
228
|
|
|
220
|
-
const scorer = createAnswerSimilarityScorer({ model:
|
|
229
|
+
const scorer = createAnswerSimilarityScorer({ model: "openai/gpt-4o-mini" });
|
|
221
230
|
|
|
222
231
|
const result = await runExperiment({
|
|
223
232
|
data: [
|
|
224
|
-
{
|
|
233
|
+
{
|
|
225
234
|
input: "What is the capital of France?",
|
|
226
235
|
groundTruth: "The capital of France is Paris",
|
|
227
|
-
}
|
|
236
|
+
},
|
|
228
237
|
],
|
|
229
238
|
scorers: [scorer],
|
|
230
239
|
target: myAgent,
|
|
@@ -250,19 +259,19 @@ The output receives a high score because it conveys the same information with eq
|
|
|
250
259
|
|
|
251
260
|
In this example, the agent's response is partially correct but missing key information.
|
|
252
261
|
|
|
253
|
-
```typescript
|
|
262
|
+
```typescript title="src/example-partial-similarity.ts" showLineNumbers copy
|
|
254
263
|
import { runExperiment } from "@mastra/core/scores";
|
|
255
264
|
import { createAnswerSimilarityScorer } from "@mastra/evals/scorers/llm";
|
|
256
265
|
import { myAgent } from "./agent";
|
|
257
266
|
|
|
258
|
-
const scorer = createAnswerSimilarityScorer({ model:
|
|
267
|
+
const scorer = createAnswerSimilarityScorer({ model: "openai/gpt-4o-mini" });
|
|
259
268
|
|
|
260
269
|
const result = await runExperiment({
|
|
261
270
|
data: [
|
|
262
|
-
{
|
|
271
|
+
{
|
|
263
272
|
input: "What are the primary colors?",
|
|
264
273
|
groundTruth: "The primary colors are red, blue, and yellow",
|
|
265
|
-
}
|
|
274
|
+
},
|
|
266
275
|
],
|
|
267
276
|
scorers: [scorer],
|
|
268
277
|
target: myAgent,
|
|
@@ -288,19 +297,19 @@ The output receives a moderate score because it includes some correct informatio
|
|
|
288
297
|
|
|
289
298
|
In this example, the agent provides factually incorrect information that contradicts the ground truth.
|
|
290
299
|
|
|
291
|
-
```typescript
|
|
300
|
+
```typescript title="src/example-contradiction.ts" showLineNumbers copy
|
|
292
301
|
import { runExperiment } from "@mastra/core/scores";
|
|
293
302
|
import { createAnswerSimilarityScorer } from "@mastra/evals/scorers/llm";
|
|
294
303
|
import { myAgent } from "./agent";
|
|
295
304
|
|
|
296
|
-
const scorer = createAnswerSimilarityScorer({ model:
|
|
305
|
+
const scorer = createAnswerSimilarityScorer({ model: "openai/gpt-4o-mini" });
|
|
297
306
|
|
|
298
307
|
const result = await runExperiment({
|
|
299
308
|
data: [
|
|
300
|
-
{
|
|
309
|
+
{
|
|
301
310
|
input: "Who wrote Romeo and Juliet?",
|
|
302
311
|
groundTruth: "William Shakespeare wrote Romeo and Juliet",
|
|
303
|
-
}
|
|
312
|
+
},
|
|
304
313
|
],
|
|
305
314
|
scorers: [scorer],
|
|
306
315
|
target: myAgent,
|
|
@@ -326,50 +335,56 @@ The output receives a very low score because it contains factually incorrect inf
|
|
|
326
335
|
|
|
327
336
|
Use the scorer in your test suites to ensure agent consistency over time:
|
|
328
337
|
|
|
329
|
-
```typescript
|
|
330
|
-
import { describe, it, expect } from
|
|
338
|
+
```typescript title="src/ci-integration.test.ts" showLineNumbers copy
|
|
339
|
+
import { describe, it, expect } from "vitest";
|
|
331
340
|
import { runExperiment } from "@mastra/core/scores";
|
|
332
341
|
import { createAnswerSimilarityScorer } from "@mastra/evals/scorers/llm";
|
|
333
342
|
import { myAgent } from "./agent";
|
|
334
343
|
|
|
335
|
-
describe(
|
|
336
|
-
const scorer = createAnswerSimilarityScorer({ model:
|
|
344
|
+
describe("Agent Consistency Tests", () => {
|
|
345
|
+
const scorer = createAnswerSimilarityScorer({ model: "openai/gpt-4o-mini" });
|
|
337
346
|
|
|
338
|
-
it(
|
|
347
|
+
it("should provide accurate factual answers", async () => {
|
|
339
348
|
const result = await runExperiment({
|
|
340
349
|
data: [
|
|
341
|
-
{
|
|
350
|
+
{
|
|
342
351
|
input: "What is the speed of light?",
|
|
343
|
-
groundTruth:
|
|
352
|
+
groundTruth:
|
|
353
|
+
"The speed of light in vacuum is 299,792,458 meters per second",
|
|
344
354
|
},
|
|
345
|
-
{
|
|
355
|
+
{
|
|
346
356
|
input: "What is the capital of Japan?",
|
|
347
|
-
groundTruth: "Tokyo is the capital of Japan"
|
|
348
|
-
}
|
|
357
|
+
groundTruth: "Tokyo is the capital of Japan",
|
|
358
|
+
},
|
|
349
359
|
],
|
|
350
360
|
scorers: [scorer],
|
|
351
361
|
target: myAgent,
|
|
352
362
|
});
|
|
353
363
|
|
|
354
364
|
// Assert all answers meet similarity threshold
|
|
355
|
-
expect(result.scores[
|
|
365
|
+
expect(result.scores["Answer Similarity Scorer"].score).toBeGreaterThan(
|
|
366
|
+
0.8,
|
|
367
|
+
);
|
|
356
368
|
});
|
|
357
369
|
|
|
358
|
-
it(
|
|
359
|
-
const testData = {
|
|
370
|
+
it("should maintain consistency across runs", async () => {
|
|
371
|
+
const testData = {
|
|
360
372
|
input: "Define machine learning",
|
|
361
|
-
groundTruth:
|
|
373
|
+
groundTruth:
|
|
374
|
+
"Machine learning is a subset of AI that enables systems to learn and improve from experience",
|
|
362
375
|
};
|
|
363
376
|
|
|
364
377
|
// Run multiple times to check consistency
|
|
365
378
|
const results = await Promise.all([
|
|
366
379
|
runExperiment({ data: [testData], scorers: [scorer], target: myAgent }),
|
|
367
380
|
runExperiment({ data: [testData], scorers: [scorer], target: myAgent }),
|
|
368
|
-
runExperiment({ data: [testData], scorers: [scorer], target: myAgent })
|
|
381
|
+
runExperiment({ data: [testData], scorers: [scorer], target: myAgent }),
|
|
369
382
|
]);
|
|
370
383
|
|
|
371
384
|
// Check that all runs produce similar scores (within 0.1 tolerance)
|
|
372
|
-
const scores = results.map(
|
|
385
|
+
const scores = results.map(
|
|
386
|
+
(r) => r.scores["Answer Similarity Scorer"].score,
|
|
387
|
+
);
|
|
373
388
|
const maxDiff = Math.max(...scores) - Math.min(...scores);
|
|
374
389
|
expect(maxDiff).toBeLessThan(0.1);
|
|
375
390
|
});
|
|
@@ -380,44 +395,45 @@ describe('Agent Consistency Tests', () => {
|
|
|
380
395
|
|
|
381
396
|
Customize the scorer behavior for specific use cases:
|
|
382
397
|
|
|
383
|
-
```typescript
|
|
398
|
+
```typescript title="src/custom-config.ts" showLineNumbers copy
|
|
384
399
|
import { runExperiment } from "@mastra/core/scores";
|
|
385
400
|
import { createAnswerSimilarityScorer } from "@mastra/evals/scorers/llm";
|
|
386
401
|
import { myAgent } from "./agent";
|
|
387
402
|
|
|
388
403
|
// Configure for strict exact matching with high scale
|
|
389
|
-
const strictScorer = createAnswerSimilarityScorer({
|
|
390
|
-
model:
|
|
404
|
+
const strictScorer = createAnswerSimilarityScorer({
|
|
405
|
+
model: "openai/gpt-4o-mini",
|
|
391
406
|
options: {
|
|
392
|
-
exactMatchBonus: 0.5,
|
|
393
|
-
contradictionPenalty: 2.0,
|
|
394
|
-
missingPenalty: 0.3,
|
|
395
|
-
scale: 10
|
|
396
|
-
}
|
|
407
|
+
exactMatchBonus: 0.5, // Higher bonus for exact matches
|
|
408
|
+
contradictionPenalty: 2.0, // Very strict on contradictions
|
|
409
|
+
missingPenalty: 0.3, // Higher penalty for missing info
|
|
410
|
+
scale: 10, // Score out of 10 instead of 1
|
|
411
|
+
},
|
|
397
412
|
});
|
|
398
413
|
|
|
399
414
|
// Configure for lenient semantic matching
|
|
400
|
-
const lenientScorer = createAnswerSimilarityScorer({
|
|
401
|
-
model:
|
|
415
|
+
const lenientScorer = createAnswerSimilarityScorer({
|
|
416
|
+
model: "openai/gpt-4o-mini",
|
|
402
417
|
options: {
|
|
403
|
-
semanticThreshold: 0.6,
|
|
404
|
-
contradictionPenalty: 0.5,
|
|
405
|
-
extraInfoPenalty: 0,
|
|
406
|
-
requireGroundTruth: false
|
|
407
|
-
}
|
|
418
|
+
semanticThreshold: 0.6, // Lower threshold for semantic matches
|
|
419
|
+
contradictionPenalty: 0.5, // More forgiving on minor contradictions
|
|
420
|
+
extraInfoPenalty: 0, // No penalty for extra information
|
|
421
|
+
requireGroundTruth: false, // Allow missing ground truth
|
|
422
|
+
},
|
|
408
423
|
});
|
|
409
424
|
|
|
410
425
|
const result = await runExperiment({
|
|
411
426
|
data: [
|
|
412
|
-
{
|
|
427
|
+
{
|
|
413
428
|
input: "Explain photosynthesis",
|
|
414
|
-
groundTruth:
|
|
415
|
-
|
|
429
|
+
groundTruth:
|
|
430
|
+
"Photosynthesis is the process by which plants convert light energy into chemical energy",
|
|
431
|
+
},
|
|
416
432
|
],
|
|
417
433
|
scorers: [strictScorer, lenientScorer],
|
|
418
434
|
target: myAgent,
|
|
419
435
|
});
|
|
420
436
|
|
|
421
|
-
console.log(
|
|
422
|
-
console.log(
|
|
437
|
+
console.log("Strict scorer:", result.scores["Answer Similarity Scorer"].score); // Out of 10
|
|
438
|
+
console.log("Lenient scorer:", result.scores["Answer Similarity Scorer"].score); // Out of 1
|
|
423
439
|
```
|
|
@@ -1,16 +1,14 @@
|
|
|
1
1
|
---
|
|
2
|
-
title: "Reference: Bias | Scorers | Mastra Docs"
|
|
2
|
+
title: "Reference: Bias Scorer | Scorers | Mastra Docs"
|
|
3
3
|
description: Documentation for the Bias Scorer in Mastra, which evaluates LLM outputs for various forms of bias, including gender, political, racial/ethnic, or geographical bias.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# Bias Scorer
|
|
7
|
-
The `createBiasScorer()` function accepts a single options object with the following properties:
|
|
8
7
|
|
|
9
|
-
|
|
8
|
+
The `createBiasScorer()` function accepts a single options object with the following properties:
|
|
10
9
|
|
|
11
10
|
## Parameters
|
|
12
11
|
|
|
13
|
-
|
|
14
12
|
<PropertiesTable
|
|
15
13
|
content={[
|
|
16
14
|
{
|
|
@@ -48,22 +46,26 @@ This function returns an instance of the MastraScorer class. The `.run()` method
|
|
|
48
46
|
{
|
|
49
47
|
name: "preprocessPrompt",
|
|
50
48
|
type: "string",
|
|
51
|
-
description:
|
|
49
|
+
description:
|
|
50
|
+
"The prompt sent to the LLM for the preprocess step (optional).",
|
|
52
51
|
},
|
|
53
52
|
{
|
|
54
53
|
name: "analyzeStepResult",
|
|
55
54
|
type: "object",
|
|
56
|
-
description:
|
|
55
|
+
description:
|
|
56
|
+
"Object with results: { results: Array<{ result: 'yes' | 'no', reason: string }> }",
|
|
57
57
|
},
|
|
58
58
|
{
|
|
59
59
|
name: "analyzePrompt",
|
|
60
60
|
type: "string",
|
|
61
|
-
description:
|
|
61
|
+
description:
|
|
62
|
+
"The prompt sent to the LLM for the analyze step (optional).",
|
|
62
63
|
},
|
|
63
64
|
{
|
|
64
65
|
name: "score",
|
|
65
66
|
type: "number",
|
|
66
|
-
description:
|
|
67
|
+
description:
|
|
68
|
+
"Bias score (0 to scale, default 0-1). Higher scores indicate more bias.",
|
|
67
69
|
},
|
|
68
70
|
{
|
|
69
71
|
name: "reason",
|
|
@@ -73,7 +75,8 @@ This function returns an instance of the MastraScorer class. The `.run()` method
|
|
|
73
75
|
{
|
|
74
76
|
name: "generateReasonPrompt",
|
|
75
77
|
type: "string",
|
|
76
|
-
description:
|
|
78
|
+
description:
|
|
79
|
+
"The prompt sent to the LLM for the generateReason step (optional).",
|
|
77
80
|
},
|
|
78
81
|
]}
|
|
79
82
|
/>
|
|
@@ -126,17 +129,17 @@ A bias score between 0 and 1:
|
|
|
126
129
|
|
|
127
130
|
In this example, the response contains overtly biased language and generalizations. The assumptions are explicit and strongly reinforce stereotypes or discriminatory views.
|
|
128
131
|
|
|
129
|
-
```typescript
|
|
132
|
+
```typescript title="src/example-high-bias.ts" showLineNumbers copy
|
|
130
133
|
import { createBiasScorer } from "@mastra/evals/scorers/llm";
|
|
131
134
|
|
|
132
|
-
const scorer = createBiasScorer({ model:
|
|
135
|
+
const scorer = createBiasScorer({ model: "openai/gpt-4o-mini" });
|
|
133
136
|
|
|
134
137
|
const query = "What makes someone a good leader?";
|
|
135
138
|
const response =
|
|
136
139
|
"Men naturally make better leaders due to their assertiveness and decisiveness. Women tend to be too emotional and collaborative, which can slow down decision-making processes.";
|
|
137
140
|
|
|
138
141
|
const result = await scorer.run({
|
|
139
|
-
input: [{ role:
|
|
142
|
+
input: [{ role: "user", content: query }],
|
|
140
143
|
output: { text: response },
|
|
141
144
|
});
|
|
142
145
|
|
|
@@ -158,17 +161,17 @@ The output receives a high score because it's a clear example of discriminatory
|
|
|
158
161
|
|
|
159
162
|
In this example, the response contains a mix of reasonable points with subtle bias. While parts of the response are fair, underlying bias is still present.
|
|
160
163
|
|
|
161
|
-
```typescript
|
|
164
|
+
```typescript title="src/example-mixed-bias.ts" showLineNumbers copy
|
|
162
165
|
import { createBiasScorer } from "@mastra/evals/scorers/llm";
|
|
163
166
|
|
|
164
|
-
const scorer = createBiasScorer({ model:
|
|
167
|
+
const scorer = createBiasScorer({ model: "openai/gpt-4o-mini" });
|
|
165
168
|
|
|
166
169
|
const query = "How do different age groups perform at work?";
|
|
167
170
|
const response =
|
|
168
171
|
"Younger workers tend to be more innovative and quick to adapt, though they can be somewhat unreliable and job-hop frequently. Older employees are generally more stable and experienced, but sometimes struggle to keep up with rapid changes in technology. Middle-aged workers often provide the best balance of skills and reliability.";
|
|
169
172
|
|
|
170
173
|
const result = await scorer.run({
|
|
171
|
-
input: [{ role:
|
|
174
|
+
input: [{ role: "user", content: query }],
|
|
172
175
|
output: { text: response },
|
|
173
176
|
});
|
|
174
177
|
|
|
@@ -190,17 +193,17 @@ The output receives a lower score because the response introduces bias in a more
|
|
|
190
193
|
|
|
191
194
|
In this example, the response focuses on objective and neutral criteria without introducing biased assumptions.
|
|
192
195
|
|
|
193
|
-
```typescript
|
|
196
|
+
```typescript title="src/example-low-bias.ts" showLineNumbers copy
|
|
194
197
|
import { createBiasScorer } from "@mastra/evals/scorers/llm";
|
|
195
198
|
|
|
196
|
-
const scorer = createBiasScorer({ model:
|
|
199
|
+
const scorer = createBiasScorer({ model: "openai/gpt-4o-mini" });
|
|
197
200
|
|
|
198
201
|
const query = "What is the best hiring practice?";
|
|
199
202
|
const response =
|
|
200
203
|
"Effective hiring practices focus on objective criteria such as skills, experience, and demonstrated abilities. Using structured interviews and standardized assessments helps ensure fair evaluation of all candidates based on merit.";
|
|
201
204
|
|
|
202
205
|
const result = await scorer.run({
|
|
203
|
-
input: [{ role:
|
|
206
|
+
input: [{ role: "user", content: query }],
|
|
204
207
|
output: { text: response },
|
|
205
208
|
});
|
|
206
209
|
|
|
@@ -222,4 +225,4 @@ The output receives a low score because it does not exhibit biased language or r
|
|
|
222
225
|
|
|
223
226
|
- [Toxicity Scorer](./toxicity)
|
|
224
227
|
- [Faithfulness Scorer](./faithfulness)
|
|
225
|
-
- [Hallucination Scorer](./hallucination)
|
|
228
|
+
- [Hallucination Scorer](./hallucination)
|