@mastra/mcp-docs-server 0.13.37 → 0.13.39-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/organized/changelogs/%40internal%2Fchangeset-cli.md +2 -0
- package/.docs/organized/changelogs/%40internal%2Fexternal-types.md +2 -0
- package/.docs/organized/changelogs/%40internal%2Fstorage-test-utils.md +8 -8
- package/.docs/organized/changelogs/%40internal%2Ftypes-builder.md +2 -0
- package/.docs/organized/changelogs/%40mastra%2Fagent-builder.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fai-sdk.md +16 -16
- package/.docs/organized/changelogs/%40mastra%2Fastra.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fauth.md +6 -0
- package/.docs/organized/changelogs/%40mastra%2Fchroma.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fclickhouse.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +38 -38
- package/.docs/organized/changelogs/%40mastra%2Fcloud.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fcloudflare-d1.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fcloudflare.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fcore.md +88 -88
- package/.docs/organized/changelogs/%40mastra%2Fcouchbase.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloud.md +28 -28
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +27 -27
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-netlify.md +27 -27
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-vercel.md +27 -27
- package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +59 -59
- package/.docs/organized/changelogs/%40mastra%2Fdynamodb.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fevals.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Ffastembed.md +6 -0
- package/.docs/organized/changelogs/%40mastra%2Flance.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Flibsql.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Floggers.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +25 -25
- package/.docs/organized/changelogs/%40mastra%2Fmcp-registry-registry.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fmcp.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fmemory.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fmongodb.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fmssql.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fopensearch.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fpg.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fpinecone.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +48 -48
- package/.docs/organized/changelogs/%40mastra%2Fqdrant.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Frag.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Freact.md +24 -24
- package/.docs/organized/changelogs/%40mastra%2Fs3vectors.md +9 -0
- package/.docs/organized/changelogs/%40mastra%2Fschema-compat.md +6 -0
- package/.docs/organized/changelogs/%40mastra%2Fserver.md +40 -40
- package/.docs/organized/changelogs/%40mastra%2Fturbopuffer.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fupstash.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fvectorize.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fvoice-azure.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fvoice-cloudflare.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fvoice-deepgram.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fvoice-elevenlabs.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fvoice-gladia.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fvoice-google-gemini-live.md +11 -1
- package/.docs/organized/changelogs/%40mastra%2Fvoice-google.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fvoice-murf.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fvoice-openai-realtime.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fvoice-openai.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fvoice-playai.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fvoice-sarvam.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fvoice-speechify.md +10 -10
- package/.docs/organized/changelogs/create-mastra.md +11 -11
- package/.docs/organized/changelogs/mastra.md +34 -34
- package/.docs/raw/agents/adding-voice.mdx +20 -9
- package/.docs/raw/agents/agent-memory.mdx +55 -39
- package/.docs/raw/agents/guardrails.mdx +68 -61
- package/.docs/raw/agents/networks.mdx +50 -46
- package/.docs/raw/agents/overview.mdx +126 -89
- package/.docs/raw/agents/using-tools.mdx +14 -15
- package/.docs/raw/auth/auth0.mdx +28 -27
- package/.docs/raw/auth/clerk.mdx +22 -20
- package/.docs/raw/auth/firebase.mdx +42 -39
- package/.docs/raw/auth/index.mdx +1 -1
- package/.docs/raw/auth/jwt.mdx +18 -16
- package/.docs/raw/auth/supabase.mdx +20 -18
- package/.docs/raw/auth/workos.mdx +32 -26
- package/.docs/raw/community/contributing-templates.mdx +7 -7
- package/.docs/raw/community/discord.mdx +2 -2
- package/.docs/raw/community/licensing.mdx +1 -1
- package/.docs/raw/course/02-agent-tools-mcp/04-initializing-mcp-tools.md +2 -2
- package/.docs/raw/course/03-agent-memory/18-advanced-configuration-semantic-recall.md +1 -1
- package/.docs/raw/course/03-agent-memory/26-updating-mastra-export-comprehensive.md +0 -32
- package/.docs/raw/deployment/cloud-providers/amazon-ec2.mdx +41 -22
- package/.docs/raw/deployment/cloud-providers/aws-lambda.mdx +48 -29
- package/.docs/raw/deployment/cloud-providers/azure-app-services.mdx +52 -24
- package/.docs/raw/deployment/cloud-providers/digital-ocean.mdx +86 -55
- package/.docs/raw/deployment/cloud-providers/index.mdx +16 -13
- package/.docs/raw/deployment/monorepo.mdx +32 -42
- package/.docs/raw/deployment/overview.mdx +15 -15
- package/.docs/raw/deployment/server-deployment.mdx +18 -23
- package/.docs/raw/deployment/serverless-platforms/cloudflare-deployer.mdx +16 -17
- package/.docs/raw/deployment/serverless-platforms/index.mdx +15 -12
- package/.docs/raw/deployment/serverless-platforms/netlify-deployer.mdx +14 -23
- package/.docs/raw/deployment/serverless-platforms/vercel-deployer.mdx +16 -23
- package/.docs/raw/deployment/web-framework.mdx +14 -14
- package/.docs/raw/frameworks/agentic-uis/ai-sdk.mdx +130 -121
- package/.docs/raw/frameworks/agentic-uis/assistant-ui.mdx +54 -43
- package/.docs/raw/frameworks/agentic-uis/cedar-os.mdx +51 -36
- package/.docs/raw/frameworks/agentic-uis/copilotkit.mdx +161 -120
- package/.docs/raw/frameworks/agentic-uis/openrouter.mdx +60 -49
- package/.docs/raw/frameworks/servers/express.mdx +46 -45
- package/.docs/raw/frameworks/web-frameworks/astro.mdx +245 -162
- package/.docs/raw/frameworks/web-frameworks/next-js.mdx +112 -69
- package/.docs/raw/frameworks/web-frameworks/sveltekit.mdx +171 -130
- package/.docs/raw/frameworks/web-frameworks/vite-react.mdx +94 -81
- package/.docs/raw/getting-started/installation.mdx +165 -134
- package/.docs/raw/getting-started/mcp-docs-server.mdx +36 -39
- package/.docs/raw/getting-started/project-structure.mdx +34 -42
- package/.docs/raw/getting-started/studio.mdx +42 -50
- package/.docs/raw/getting-started/templates.mdx +18 -15
- package/.docs/raw/index.mdx +9 -9
- package/.docs/raw/mastra-cloud/dashboard.mdx +12 -12
- package/.docs/raw/mastra-cloud/observability.mdx +9 -10
- package/.docs/raw/mastra-cloud/overview.mdx +17 -23
- package/.docs/raw/mastra-cloud/setting-up.mdx +33 -20
- package/.docs/raw/memory/conversation-history.mdx +2 -2
- package/.docs/raw/memory/overview.mdx +22 -24
- package/.docs/raw/memory/semantic-recall.mdx +16 -17
- package/.docs/raw/memory/storage/memory-with-libsql.mdx +27 -28
- package/.docs/raw/memory/storage/memory-with-pg.mdx +26 -26
- package/.docs/raw/memory/storage/memory-with-upstash.mdx +26 -27
- package/.docs/raw/memory/threads-and-resources.mdx +24 -21
- package/.docs/raw/memory/working-memory.mdx +27 -38
- package/.docs/raw/observability/ai-tracing/exporters/arize.mdx +30 -29
- package/.docs/raw/observability/ai-tracing/exporters/braintrust.mdx +8 -9
- package/.docs/raw/observability/ai-tracing/exporters/cloud.mdx +17 -16
- package/.docs/raw/observability/ai-tracing/exporters/default.mdx +37 -38
- package/.docs/raw/observability/ai-tracing/exporters/langfuse.mdx +18 -17
- package/.docs/raw/observability/ai-tracing/exporters/langsmith.mdx +14 -14
- package/.docs/raw/observability/ai-tracing/exporters/otel.mdx +56 -46
- package/.docs/raw/observability/ai-tracing/overview.mdx +151 -128
- package/.docs/raw/observability/ai-tracing/processors/sensitive-data-filter.mdx +57 -36
- package/.docs/raw/observability/logging.mdx +14 -17
- package/.docs/raw/observability/nextjs-tracing.mdx +5 -5
- package/.docs/raw/observability/otel-tracing.mdx +17 -18
- package/.docs/raw/observability/overview.mdx +15 -11
- package/.docs/raw/rag/chunking-and-embedding.mdx +10 -8
- package/.docs/raw/rag/overview.mdx +5 -5
- package/.docs/raw/rag/retrieval.mdx +125 -107
- package/.docs/raw/rag/vector-databases.mdx +232 -223
- package/.docs/raw/reference/agents/agent.mdx +56 -43
- package/.docs/raw/reference/agents/generate.mdx +367 -261
- package/.docs/raw/reference/agents/generateLegacy.mdx +260 -178
- package/.docs/raw/reference/agents/getDefaultGenerateOptions.mdx +7 -5
- package/.docs/raw/reference/agents/getDefaultStreamOptions.mdx +7 -5
- package/.docs/raw/reference/agents/getDescription.mdx +3 -2
- package/.docs/raw/reference/agents/getInstructions.mdx +7 -5
- package/.docs/raw/reference/agents/getLLM.mdx +11 -7
- package/.docs/raw/reference/agents/getMemory.mdx +7 -5
- package/.docs/raw/reference/agents/getModel.mdx +7 -5
- package/.docs/raw/reference/agents/getScorers.mdx +7 -5
- package/.docs/raw/reference/agents/getTools.mdx +7 -5
- package/.docs/raw/reference/agents/getVoice.mdx +7 -5
- package/.docs/raw/reference/agents/getWorkflows.mdx +7 -5
- package/.docs/raw/reference/agents/listAgents.mdx +6 -4
- package/.docs/raw/reference/agents/listScorers.mdx +69 -0
- package/.docs/raw/reference/agents/listTools.mdx +69 -0
- package/.docs/raw/reference/agents/listWorkflows.mdx +69 -0
- package/.docs/raw/reference/agents/network.mdx +171 -116
- package/.docs/raw/reference/auth/auth0.mdx +18 -14
- package/.docs/raw/reference/auth/clerk.mdx +15 -12
- package/.docs/raw/reference/auth/firebase.mdx +23 -16
- package/.docs/raw/reference/auth/jwt.mdx +7 -6
- package/.docs/raw/reference/auth/supabase.mdx +13 -10
- package/.docs/raw/reference/auth/workos.mdx +17 -13
- package/.docs/raw/reference/cli/create-mastra.mdx +61 -44
- package/.docs/raw/reference/cli/mastra.mdx +11 -11
- package/.docs/raw/reference/client-js/agents.mdx +41 -44
- package/.docs/raw/reference/client-js/error-handling.mdx +1 -1
- package/.docs/raw/reference/client-js/logs.mdx +2 -2
- package/.docs/raw/reference/client-js/mastra-client.mdx +28 -20
- package/.docs/raw/reference/client-js/memory.mdx +7 -3
- package/.docs/raw/reference/client-js/observability.mdx +10 -9
- package/.docs/raw/reference/client-js/telemetry.mdx +1 -1
- package/.docs/raw/reference/client-js/tools.mdx +2 -2
- package/.docs/raw/reference/client-js/vectors.mdx +1 -1
- package/.docs/raw/reference/client-js/workflows-legacy.mdx +1 -1
- package/.docs/raw/reference/client-js/workflows.mdx +8 -8
- package/.docs/raw/reference/core/getAgent.mdx +6 -5
- package/.docs/raw/reference/core/getAgentById.mdx +5 -4
- package/.docs/raw/reference/core/getAgents.mdx +3 -3
- package/.docs/raw/reference/core/getDeployer.mdx +4 -3
- package/.docs/raw/reference/core/getLogger.mdx +4 -3
- package/.docs/raw/reference/core/getLogs.mdx +10 -6
- package/.docs/raw/reference/core/getLogsByRunId.mdx +8 -5
- package/.docs/raw/reference/core/getMCPServer.mdx +8 -5
- package/.docs/raw/reference/core/getMCPServers.mdx +4 -3
- package/.docs/raw/reference/core/getMemory.mdx +4 -3
- package/.docs/raw/reference/core/getScorer.mdx +15 -12
- package/.docs/raw/reference/core/getScorerByName.mdx +13 -10
- package/.docs/raw/reference/core/getScorers.mdx +3 -2
- package/.docs/raw/reference/core/getServer.mdx +4 -3
- package/.docs/raw/reference/core/getStorage.mdx +4 -3
- package/.docs/raw/reference/core/getTelemetry.mdx +4 -3
- package/.docs/raw/reference/core/getVector.mdx +6 -4
- package/.docs/raw/reference/core/getVectors.mdx +4 -3
- package/.docs/raw/reference/core/getWorkflow.mdx +7 -4
- package/.docs/raw/reference/core/getWorkflows.mdx +5 -3
- package/.docs/raw/reference/core/listAgents.mdx +35 -0
- package/.docs/raw/reference/core/listLogs.mdx +96 -0
- package/.docs/raw/reference/core/listLogsByRunId.mdx +87 -0
- package/.docs/raw/reference/core/listScorers.mdx +43 -0
- package/.docs/raw/reference/core/listWorkflows.mdx +45 -0
- package/.docs/raw/reference/core/mastra-class.mdx +16 -16
- package/.docs/raw/reference/core/setLogger.mdx +6 -4
- package/.docs/raw/reference/core/setStorage.mdx +4 -4
- package/.docs/raw/reference/core/setTelemetry.mdx +4 -3
- package/.docs/raw/reference/deployer/cloudflare.mdx +11 -7
- package/.docs/raw/reference/deployer/deployer.mdx +2 -1
- package/.docs/raw/reference/deployer/netlify.mdx +4 -4
- package/.docs/raw/reference/deployer/vercel.mdx +6 -6
- package/.docs/raw/reference/evals/answer-relevancy.mdx +4 -5
- package/.docs/raw/reference/evals/bias.mdx +4 -5
- package/.docs/raw/reference/evals/completeness.mdx +4 -5
- package/.docs/raw/reference/evals/content-similarity.mdx +4 -5
- package/.docs/raw/reference/evals/context-position.mdx +4 -5
- package/.docs/raw/reference/evals/context-precision.mdx +4 -5
- package/.docs/raw/reference/evals/context-relevancy.mdx +4 -5
- package/.docs/raw/reference/evals/contextual-recall.mdx +4 -5
- package/.docs/raw/reference/evals/faithfulness.mdx +4 -5
- package/.docs/raw/reference/evals/hallucination.mdx +4 -6
- package/.docs/raw/reference/evals/keyword-coverage.mdx +4 -5
- package/.docs/raw/reference/evals/prompt-alignment.mdx +4 -6
- package/.docs/raw/reference/evals/summarization.mdx +4 -6
- package/.docs/raw/reference/evals/textual-difference.mdx +4 -5
- package/.docs/raw/reference/evals/tone-consistency.mdx +4 -7
- package/.docs/raw/reference/evals/toxicity.mdx +4 -5
- package/.docs/raw/reference/index.mdx +3 -3
- package/.docs/raw/reference/legacyWorkflows/after.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/afterEvent.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/commit.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/createRun.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/else.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/events.mdx +7 -7
- package/.docs/raw/reference/legacyWorkflows/execute.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/if.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/resume.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/resumeWithEvent.mdx +6 -6
- package/.docs/raw/reference/legacyWorkflows/snapshots.mdx +6 -6
- package/.docs/raw/reference/legacyWorkflows/start.mdx +6 -6
- package/.docs/raw/reference/legacyWorkflows/step-class.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/step-condition.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/step-function.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/step-options.mdx +6 -6
- package/.docs/raw/reference/legacyWorkflows/step-retries.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/suspend.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/then.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/until.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/watch.mdx +3 -3
- package/.docs/raw/reference/legacyWorkflows/while.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/workflow.mdx +7 -7
- package/.docs/raw/reference/memory/createThread.mdx +10 -10
- package/.docs/raw/reference/memory/deleteMessages.mdx +7 -5
- package/.docs/raw/reference/memory/getThreadById.mdx +6 -5
- package/.docs/raw/reference/memory/getThreadsByResourceId.mdx +11 -10
- package/.docs/raw/reference/memory/getThreadsByResourceIdPaginated.mdx +10 -9
- package/.docs/raw/reference/memory/{Memory.mdx → memory-class.mdx} +55 -47
- package/.docs/raw/reference/memory/query.mdx +39 -25
- package/.docs/raw/reference/observability/ai-tracing/ai-tracing.mdx +9 -6
- package/.docs/raw/reference/observability/ai-tracing/configuration.mdx +26 -22
- package/.docs/raw/reference/observability/ai-tracing/exporters/arize.mdx +16 -12
- package/.docs/raw/reference/observability/ai-tracing/exporters/braintrust.mdx +14 -14
- package/.docs/raw/reference/observability/ai-tracing/exporters/cloud-exporter.mdx +16 -11
- package/.docs/raw/reference/observability/ai-tracing/exporters/console-exporter.mdx +10 -6
- package/.docs/raw/reference/observability/ai-tracing/exporters/default-exporter.mdx +23 -15
- package/.docs/raw/reference/observability/ai-tracing/exporters/langfuse.mdx +6 -6
- package/.docs/raw/reference/observability/ai-tracing/exporters/langsmith.mdx +13 -13
- package/.docs/raw/reference/observability/ai-tracing/exporters/otel.mdx +44 -36
- package/.docs/raw/reference/observability/ai-tracing/interfaces.mdx +116 -107
- package/.docs/raw/reference/observability/ai-tracing/processors/sensitive-data-filter.mdx +37 -32
- package/.docs/raw/reference/observability/ai-tracing/span.mdx +29 -26
- package/.docs/raw/reference/observability/logging/pino-logger.mdx +13 -15
- package/.docs/raw/reference/observability/otel-tracing/otel-config.mdx +2 -2
- package/.docs/raw/reference/observability/otel-tracing/providers/arize-ax.mdx +3 -3
- package/.docs/raw/reference/observability/otel-tracing/providers/arize-phoenix.mdx +2 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/dash0.mdx +1 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/index.mdx +12 -14
- package/.docs/raw/reference/observability/otel-tracing/providers/keywordsai.mdx +4 -4
- package/.docs/raw/reference/observability/otel-tracing/providers/laminar.mdx +1 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/langfuse.mdx +4 -4
- package/.docs/raw/reference/observability/otel-tracing/providers/langsmith.mdx +1 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/langwatch.mdx +2 -2
- package/.docs/raw/reference/observability/otel-tracing/providers/new-relic.mdx +1 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/signoz.mdx +1 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/traceloop.mdx +1 -1
- package/.docs/raw/reference/processors/batch-parts-processor.mdx +3 -4
- package/.docs/raw/reference/processors/language-detector.mdx +6 -8
- package/.docs/raw/reference/processors/moderation-processor.mdx +36 -11
- package/.docs/raw/reference/processors/pii-detector.mdx +34 -9
- package/.docs/raw/reference/processors/prompt-injection-detector.mdx +6 -8
- package/.docs/raw/reference/processors/system-prompt-scrubber.mdx +14 -10
- package/.docs/raw/reference/processors/token-limiter-processor.mdx +3 -5
- package/.docs/raw/reference/processors/unicode-normalizer.mdx +3 -4
- package/.docs/raw/reference/rag/chunk.mdx +41 -23
- package/.docs/raw/reference/rag/database-config.mdx +56 -38
- package/.docs/raw/reference/rag/embeddings.mdx +1 -1
- package/.docs/raw/reference/rag/extract-params.mdx +1 -1
- package/.docs/raw/reference/rag/graph-rag.mdx +1 -1
- package/.docs/raw/reference/rag/metadata-filters.mdx +23 -26
- package/.docs/raw/reference/rag/rerank.mdx +1 -1
- package/.docs/raw/reference/rag/rerankWithScorer.mdx +2 -2
- package/.docs/raw/reference/scorers/answer-relevancy.mdx +35 -17
- package/.docs/raw/reference/scorers/answer-similarity.mdx +84 -68
- package/.docs/raw/reference/scorers/bias.mdx +22 -19
- package/.docs/raw/reference/scorers/completeness.mdx +21 -16
- package/.docs/raw/reference/scorers/content-similarity.mdx +12 -10
- package/.docs/raw/reference/scorers/context-precision.mdx +73 -64
- package/.docs/raw/reference/scorers/context-relevance.mdx +142 -126
- package/.docs/raw/reference/scorers/create-scorer.mdx +93 -61
- package/.docs/raw/reference/scorers/faithfulness.mdx +21 -13
- package/.docs/raw/reference/scorers/hallucination.mdx +17 -12
- package/.docs/raw/reference/scorers/keyword-coverage.mdx +32 -27
- package/.docs/raw/reference/scorers/mastra-scorer.mdx +39 -33
- package/.docs/raw/reference/scorers/noise-sensitivity.mdx +203 -152
- package/.docs/raw/reference/scorers/prompt-alignment.mdx +186 -132
- package/.docs/raw/reference/scorers/run-experiment.mdx +41 -32
- package/.docs/raw/reference/scorers/textual-difference.mdx +25 -26
- package/.docs/raw/reference/scorers/tone-consistency.mdx +29 -26
- package/.docs/raw/reference/scorers/tool-call-accuracy.mdx +180 -176
- package/.docs/raw/reference/scorers/toxicity.mdx +35 -31
- package/.docs/raw/reference/storage/cloudflare-d1.mdx +1 -1
- package/.docs/raw/reference/storage/cloudflare.mdx +1 -1
- package/.docs/raw/reference/storage/dynamodb.mdx +1 -1
- package/.docs/raw/reference/storage/lance.mdx +1 -1
- package/.docs/raw/reference/storage/libsql.mdx +2 -2
- package/.docs/raw/reference/storage/mongodb.mdx +4 -5
- package/.docs/raw/reference/storage/mssql.mdx +8 -7
- package/.docs/raw/reference/storage/postgresql.mdx +35 -33
- package/.docs/raw/reference/storage/upstash.mdx +6 -5
- package/.docs/raw/reference/streaming/ChunkType.mdx +788 -314
- package/.docs/raw/reference/streaming/agents/MastraModelOutput.mdx +265 -109
- package/.docs/raw/reference/streaming/agents/stream.mdx +375 -266
- package/.docs/raw/reference/streaming/agents/streamLegacy.mdx +233 -162
- package/.docs/raw/reference/streaming/workflows/observeStream.mdx +4 -4
- package/.docs/raw/reference/streaming/workflows/observeStreamVNext.mdx +5 -5
- package/.docs/raw/reference/streaming/workflows/resumeStreamVNext.mdx +25 -21
- package/.docs/raw/reference/streaming/workflows/stream.mdx +36 -27
- package/.docs/raw/reference/streaming/workflows/streamVNext.mdx +37 -28
- package/.docs/raw/reference/templates/overview.mdx +16 -39
- package/.docs/raw/reference/tools/client.mdx +1 -1
- package/.docs/raw/reference/tools/create-tool.mdx +45 -35
- package/.docs/raw/reference/tools/document-chunker-tool.mdx +2 -2
- package/.docs/raw/reference/tools/graph-rag-tool.mdx +12 -12
- package/.docs/raw/reference/tools/mcp-client.mdx +78 -72
- package/.docs/raw/reference/tools/mcp-server.mdx +91 -78
- package/.docs/raw/reference/tools/vector-query-tool.mdx +48 -38
- package/.docs/raw/reference/vectors/astra.mdx +1 -1
- package/.docs/raw/reference/vectors/chroma.mdx +25 -19
- package/.docs/raw/reference/vectors/couchbase.mdx +4 -4
- package/.docs/raw/reference/vectors/lance.mdx +5 -6
- package/.docs/raw/reference/vectors/libsql.mdx +1 -1
- package/.docs/raw/reference/vectors/mongodb.mdx +1 -1
- package/.docs/raw/reference/vectors/opensearch.mdx +1 -1
- package/.docs/raw/reference/vectors/pg.mdx +8 -4
- package/.docs/raw/reference/vectors/pinecone.mdx +1 -1
- package/.docs/raw/reference/vectors/qdrant.mdx +1 -1
- package/.docs/raw/reference/vectors/s3vectors.mdx +35 -27
- package/.docs/raw/reference/vectors/turbopuffer.mdx +1 -1
- package/.docs/raw/reference/vectors/upstash.mdx +33 -25
- package/.docs/raw/reference/vectors/vectorize.mdx +1 -1
- package/.docs/raw/reference/voice/azure.mdx +1 -1
- package/.docs/raw/reference/voice/cloudflare.mdx +1 -1
- package/.docs/raw/reference/voice/composite-voice.mdx +1 -1
- package/.docs/raw/reference/voice/deepgram.mdx +1 -1
- package/.docs/raw/reference/voice/elevenlabs.mdx +1 -1
- package/.docs/raw/reference/voice/google-gemini-live.mdx +7 -5
- package/.docs/raw/reference/voice/google.mdx +1 -1
- package/.docs/raw/reference/voice/mastra-voice.mdx +1 -1
- package/.docs/raw/reference/voice/murf.mdx +1 -1
- package/.docs/raw/reference/voice/openai-realtime.mdx +1 -1
- package/.docs/raw/reference/voice/openai.mdx +1 -1
- package/.docs/raw/reference/voice/playai.mdx +1 -1
- package/.docs/raw/reference/voice/sarvam.mdx +1 -1
- package/.docs/raw/reference/voice/speechify.mdx +1 -1
- package/.docs/raw/reference/voice/voice.addInstructions.mdx +1 -1
- package/.docs/raw/reference/voice/voice.addTools.mdx +1 -1
- package/.docs/raw/reference/voice/voice.answer.mdx +1 -1
- package/.docs/raw/reference/voice/voice.close.mdx +1 -1
- package/.docs/raw/reference/voice/voice.connect.mdx +1 -1
- package/.docs/raw/reference/voice/voice.events.mdx +1 -1
- package/.docs/raw/reference/voice/voice.getSpeakers.mdx +23 -30
- package/.docs/raw/reference/voice/voice.listen.mdx +1 -1
- package/.docs/raw/reference/voice/voice.off.mdx +1 -1
- package/.docs/raw/reference/voice/voice.on.mdx +1 -1
- package/.docs/raw/reference/voice/voice.send.mdx +1 -1
- package/.docs/raw/reference/voice/voice.speak.mdx +1 -1
- package/.docs/raw/reference/voice/voice.updateConfig.mdx +1 -1
- package/.docs/raw/reference/workflows/run-methods/cancel.mdx +4 -3
- package/.docs/raw/reference/workflows/run-methods/resume.mdx +49 -34
- package/.docs/raw/reference/workflows/run-methods/start.mdx +43 -31
- package/.docs/raw/reference/workflows/run-methods/watch.mdx +7 -8
- package/.docs/raw/reference/workflows/run.mdx +7 -10
- package/.docs/raw/reference/workflows/step.mdx +16 -13
- package/.docs/raw/reference/workflows/workflow-methods/branch.mdx +2 -2
- package/.docs/raw/reference/workflows/workflow-methods/commit.mdx +1 -1
- package/.docs/raw/reference/workflows/workflow-methods/create-run.mdx +7 -7
- package/.docs/raw/reference/workflows/workflow-methods/dountil.mdx +2 -2
- package/.docs/raw/reference/workflows/workflow-methods/dowhile.mdx +2 -2
- package/.docs/raw/reference/workflows/workflow-methods/foreach.mdx +1 -1
- package/.docs/raw/reference/workflows/workflow-methods/map.mdx +74 -3
- package/.docs/raw/reference/workflows/workflow-methods/parallel.mdx +1 -1
- package/.docs/raw/reference/workflows/workflow-methods/sendEvent.mdx +5 -5
- package/.docs/raw/reference/workflows/workflow-methods/sleep.mdx +3 -2
- package/.docs/raw/reference/workflows/workflow-methods/sleepUntil.mdx +3 -2
- package/.docs/raw/reference/workflows/workflow-methods/then.mdx +1 -1
- package/.docs/raw/reference/workflows/workflow-methods/waitForEvent.mdx +3 -3
- package/.docs/raw/reference/workflows/workflow.mdx +12 -23
- package/.docs/raw/scorers/custom-scorers.mdx +58 -48
- package/.docs/raw/scorers/evals-old-api/custom-eval.mdx +6 -8
- package/.docs/raw/scorers/evals-old-api/overview.mdx +8 -8
- package/.docs/raw/scorers/evals-old-api/running-in-ci.mdx +9 -9
- package/.docs/raw/scorers/evals-old-api/textual-evals.mdx +5 -5
- package/.docs/raw/scorers/off-the-shelf-scorers.mdx +3 -1
- package/.docs/raw/scorers/overview.mdx +23 -22
- package/.docs/raw/server-db/custom-api-routes.mdx +8 -8
- package/.docs/raw/server-db/mastra-client.mdx +56 -54
- package/.docs/raw/server-db/middleware.mdx +15 -11
- package/.docs/raw/server-db/production-server.mdx +5 -7
- package/.docs/raw/server-db/runtime-context.mdx +41 -38
- package/.docs/raw/server-db/storage.mdx +82 -87
- package/.docs/raw/streaming/events.mdx +24 -16
- package/.docs/raw/streaming/overview.mdx +17 -18
- package/.docs/raw/streaming/tool-streaming.mdx +9 -10
- package/.docs/raw/streaming/workflow-streaming.mdx +14 -12
- package/.docs/raw/tools-mcp/advanced-usage.mdx +2 -2
- package/.docs/raw/tools-mcp/mcp-overview.mdx +92 -102
- package/.docs/raw/tools-mcp/overview.mdx +10 -15
- package/.docs/raw/voice/overview.mdx +273 -250
- package/.docs/raw/voice/speech-to-speech.mdx +14 -12
- package/.docs/raw/voice/speech-to-text.mdx +2 -2
- package/.docs/raw/voice/text-to-speech.mdx +2 -2
- package/.docs/raw/workflows/agents-and-tools.mdx +27 -30
- package/.docs/raw/workflows/control-flow.mdx +213 -170
- package/.docs/raw/workflows/error-handling.mdx +15 -17
- package/.docs/raw/workflows/human-in-the-loop.mdx +39 -39
- package/.docs/raw/workflows/inngest-workflow.mdx +35 -31
- package/.docs/raw/workflows/overview.mdx +108 -56
- package/.docs/raw/workflows/snapshots.mdx +54 -36
- package/.docs/raw/workflows/suspend-and-resume.mdx +52 -65
- package/.docs/raw/workflows-legacy/control-flow.mdx +15 -17
- package/.docs/raw/workflows-legacy/dynamic-workflows.mdx +3 -1
- package/.docs/raw/workflows-legacy/error-handling.mdx +8 -6
- package/.docs/raw/workflows-legacy/nested-workflows.mdx +6 -0
- package/.docs/raw/workflows-legacy/overview.mdx +28 -26
- package/.docs/raw/workflows-legacy/runtime-variables.mdx +4 -2
- package/.docs/raw/workflows-legacy/steps.mdx +5 -3
- package/.docs/raw/workflows-legacy/suspend-and-resume.mdx +10 -8
- package/.docs/raw/workflows-legacy/variables.mdx +10 -8
- package/CHANGELOG.md +24 -0
- package/package.json +5 -5
- package/.docs/raw/memory/storage/memory-with-mongodb.mdx +0 -148
- package/.docs/raw/workflows/input-data-mapping.mdx +0 -107
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
---
|
|
2
|
-
title: "Reference: Answer Similarity | Scorers | Mastra Docs"
|
|
2
|
+
title: "Reference: Answer Similarity Scorer | Scorers | Mastra Docs"
|
|
3
3
|
description: Documentation for the Answer Similarity Scorer in Mastra, which compares agent outputs against ground truth answers for CI/CD testing.
|
|
4
4
|
---
|
|
5
5
|
|
|
@@ -15,7 +15,8 @@ The `createAnswerSimilarityScorer()` function creates a scorer that evaluates ho
|
|
|
15
15
|
name: "model",
|
|
16
16
|
type: "LanguageModel",
|
|
17
17
|
required: true,
|
|
18
|
-
description:
|
|
18
|
+
description:
|
|
19
|
+
"The language model used to evaluate semantic similarity between outputs and ground truth.",
|
|
19
20
|
},
|
|
20
21
|
{
|
|
21
22
|
name: "options",
|
|
@@ -35,7 +36,8 @@ The `createAnswerSimilarityScorer()` function creates a scorer that evaluates ho
|
|
|
35
36
|
type: "boolean",
|
|
36
37
|
required: false,
|
|
37
38
|
defaultValue: "true",
|
|
38
|
-
description:
|
|
39
|
+
description:
|
|
40
|
+
"Whether to require ground truth for evaluation. If false, missing ground truth returns score 0.",
|
|
39
41
|
},
|
|
40
42
|
{
|
|
41
43
|
name: "semanticThreshold",
|
|
@@ -63,14 +65,16 @@ The `createAnswerSimilarityScorer()` function creates a scorer that evaluates ho
|
|
|
63
65
|
type: "number",
|
|
64
66
|
required: false,
|
|
65
67
|
defaultValue: "1.0",
|
|
66
|
-
description:
|
|
68
|
+
description:
|
|
69
|
+
"Penalty for contradictory information. High value ensures wrong answers score near 0.",
|
|
67
70
|
},
|
|
68
71
|
{
|
|
69
72
|
name: "extraInfoPenalty",
|
|
70
73
|
type: "number",
|
|
71
74
|
required: false,
|
|
72
75
|
defaultValue: "0.05",
|
|
73
|
-
description:
|
|
76
|
+
description:
|
|
77
|
+
"Mild penalty for extra information not present in ground truth (capped at 0.2).",
|
|
74
78
|
},
|
|
75
79
|
{
|
|
76
80
|
name: "scale",
|
|
@@ -96,12 +100,14 @@ This function returns an instance of the MastraScorer class. The `.run()` method
|
|
|
96
100
|
{
|
|
97
101
|
name: "score",
|
|
98
102
|
type: "number",
|
|
99
|
-
description:
|
|
103
|
+
description:
|
|
104
|
+
"Similarity score between 0-1 (or 0-scale if custom scale used). Higher scores indicate better similarity to ground truth.",
|
|
100
105
|
},
|
|
101
106
|
{
|
|
102
107
|
name: "reason",
|
|
103
108
|
type: "string",
|
|
104
|
-
description:
|
|
109
|
+
description:
|
|
110
|
+
"Human-readable explanation of the score with actionable feedback.",
|
|
105
111
|
},
|
|
106
112
|
{
|
|
107
113
|
name: "preprocessStepResult",
|
|
@@ -111,7 +117,8 @@ This function returns an instance of the MastraScorer class. The `.run()` method
|
|
|
111
117
|
{
|
|
112
118
|
name: "analyzeStepResult",
|
|
113
119
|
type: "object",
|
|
114
|
-
description:
|
|
120
|
+
description:
|
|
121
|
+
"Detailed analysis of matches, contradictions, and extra information.",
|
|
115
122
|
},
|
|
116
123
|
{
|
|
117
124
|
name: "preprocessPrompt",
|
|
@@ -149,24 +156,26 @@ Score calculation: `max(0, base_score - contradiction_penalty - missing_penalty
|
|
|
149
156
|
This scorer is designed for use with `runExperiment` for CI/CD testing:
|
|
150
157
|
|
|
151
158
|
```typescript
|
|
152
|
-
import { runExperiment } from
|
|
153
|
-
import { createAnswerSimilarityScorer } from
|
|
159
|
+
import { runExperiment } from "@mastra/core/scores";
|
|
160
|
+
import { createAnswerSimilarityScorer } from "@mastra/evals/scorers/llm";
|
|
154
161
|
|
|
155
162
|
const scorer = createAnswerSimilarityScorer({ model });
|
|
156
163
|
|
|
157
164
|
await runExperiment({
|
|
158
165
|
data: [
|
|
159
|
-
{
|
|
166
|
+
{
|
|
160
167
|
input: "What is the capital of France?",
|
|
161
|
-
groundTruth: "Paris is the capital of France"
|
|
162
|
-
}
|
|
168
|
+
groundTruth: "Paris is the capital of France",
|
|
169
|
+
},
|
|
163
170
|
],
|
|
164
171
|
scorers: [scorer],
|
|
165
172
|
target: myAgent,
|
|
166
173
|
onItemComplete: ({ scorerResults }) => {
|
|
167
174
|
// Assert similarity score meets threshold
|
|
168
|
-
expect(scorerResults[
|
|
169
|
-
|
|
175
|
+
expect(scorerResults["Answer Similarity Scorer"].score).toBeGreaterThan(
|
|
176
|
+
0.8,
|
|
177
|
+
);
|
|
178
|
+
},
|
|
170
179
|
});
|
|
171
180
|
```
|
|
172
181
|
|
|
@@ -174,19 +183,19 @@ await runExperiment({
|
|
|
174
183
|
|
|
175
184
|
In this example, the agent's output semantically matches the ground truth perfectly.
|
|
176
185
|
|
|
177
|
-
```typescript
|
|
186
|
+
```typescript title="src/example-perfect-similarity.ts" showLineNumbers copy
|
|
178
187
|
import { runExperiment } from "@mastra/core/scores";
|
|
179
188
|
import { createAnswerSimilarityScorer } from "@mastra/evals/scorers/llm";
|
|
180
189
|
import { myAgent } from "./agent";
|
|
181
190
|
|
|
182
|
-
const scorer = createAnswerSimilarityScorer({ model:
|
|
191
|
+
const scorer = createAnswerSimilarityScorer({ model: "openai/gpt-4o-mini" });
|
|
183
192
|
|
|
184
193
|
const result = await runExperiment({
|
|
185
194
|
data: [
|
|
186
|
-
{
|
|
195
|
+
{
|
|
187
196
|
input: "What is 2+2?",
|
|
188
|
-
groundTruth: "4"
|
|
189
|
-
}
|
|
197
|
+
groundTruth: "4",
|
|
198
|
+
},
|
|
190
199
|
],
|
|
191
200
|
scorers: [scorer],
|
|
192
201
|
target: myAgent,
|
|
@@ -212,19 +221,19 @@ The output receives a perfect score because both the agent's answer and ground t
|
|
|
212
221
|
|
|
213
222
|
In this example, the agent provides the same information as the ground truth but with different phrasing.
|
|
214
223
|
|
|
215
|
-
```typescript
|
|
224
|
+
```typescript title="src/example-semantic-similarity.ts" showLineNumbers copy
|
|
216
225
|
import { runExperiment } from "@mastra/core/scores";
|
|
217
226
|
import { createAnswerSimilarityScorer } from "@mastra/evals/scorers/llm";
|
|
218
227
|
import { myAgent } from "./agent";
|
|
219
228
|
|
|
220
|
-
const scorer = createAnswerSimilarityScorer({ model:
|
|
229
|
+
const scorer = createAnswerSimilarityScorer({ model: "openai/gpt-4o-mini" });
|
|
221
230
|
|
|
222
231
|
const result = await runExperiment({
|
|
223
232
|
data: [
|
|
224
|
-
{
|
|
233
|
+
{
|
|
225
234
|
input: "What is the capital of France?",
|
|
226
235
|
groundTruth: "The capital of France is Paris",
|
|
227
|
-
}
|
|
236
|
+
},
|
|
228
237
|
],
|
|
229
238
|
scorers: [scorer],
|
|
230
239
|
target: myAgent,
|
|
@@ -250,19 +259,19 @@ The output receives a high score because it conveys the same information with eq
|
|
|
250
259
|
|
|
251
260
|
In this example, the agent's response is partially correct but missing key information.
|
|
252
261
|
|
|
253
|
-
```typescript
|
|
262
|
+
```typescript title="src/example-partial-similarity.ts" showLineNumbers copy
|
|
254
263
|
import { runExperiment } from "@mastra/core/scores";
|
|
255
264
|
import { createAnswerSimilarityScorer } from "@mastra/evals/scorers/llm";
|
|
256
265
|
import { myAgent } from "./agent";
|
|
257
266
|
|
|
258
|
-
const scorer = createAnswerSimilarityScorer({ model:
|
|
267
|
+
const scorer = createAnswerSimilarityScorer({ model: "openai/gpt-4o-mini" });
|
|
259
268
|
|
|
260
269
|
const result = await runExperiment({
|
|
261
270
|
data: [
|
|
262
|
-
{
|
|
271
|
+
{
|
|
263
272
|
input: "What are the primary colors?",
|
|
264
273
|
groundTruth: "The primary colors are red, blue, and yellow",
|
|
265
|
-
}
|
|
274
|
+
},
|
|
266
275
|
],
|
|
267
276
|
scorers: [scorer],
|
|
268
277
|
target: myAgent,
|
|
@@ -288,19 +297,19 @@ The output receives a moderate score because it includes some correct informatio
|
|
|
288
297
|
|
|
289
298
|
In this example, the agent provides factually incorrect information that contradicts the ground truth.
|
|
290
299
|
|
|
291
|
-
```typescript
|
|
300
|
+
```typescript title="src/example-contradiction.ts" showLineNumbers copy
|
|
292
301
|
import { runExperiment } from "@mastra/core/scores";
|
|
293
302
|
import { createAnswerSimilarityScorer } from "@mastra/evals/scorers/llm";
|
|
294
303
|
import { myAgent } from "./agent";
|
|
295
304
|
|
|
296
|
-
const scorer = createAnswerSimilarityScorer({ model:
|
|
305
|
+
const scorer = createAnswerSimilarityScorer({ model: "openai/gpt-4o-mini" });
|
|
297
306
|
|
|
298
307
|
const result = await runExperiment({
|
|
299
308
|
data: [
|
|
300
|
-
{
|
|
309
|
+
{
|
|
301
310
|
input: "Who wrote Romeo and Juliet?",
|
|
302
311
|
groundTruth: "William Shakespeare wrote Romeo and Juliet",
|
|
303
|
-
}
|
|
312
|
+
},
|
|
304
313
|
],
|
|
305
314
|
scorers: [scorer],
|
|
306
315
|
target: myAgent,
|
|
@@ -326,50 +335,56 @@ The output receives a very low score because it contains factually incorrect inf
|
|
|
326
335
|
|
|
327
336
|
Use the scorer in your test suites to ensure agent consistency over time:
|
|
328
337
|
|
|
329
|
-
```typescript
|
|
330
|
-
import { describe, it, expect } from
|
|
338
|
+
```typescript title="src/ci-integration.test.ts" showLineNumbers copy
|
|
339
|
+
import { describe, it, expect } from "vitest";
|
|
331
340
|
import { runExperiment } from "@mastra/core/scores";
|
|
332
341
|
import { createAnswerSimilarityScorer } from "@mastra/evals/scorers/llm";
|
|
333
342
|
import { myAgent } from "./agent";
|
|
334
343
|
|
|
335
|
-
describe(
|
|
336
|
-
const scorer = createAnswerSimilarityScorer({ model:
|
|
344
|
+
describe("Agent Consistency Tests", () => {
|
|
345
|
+
const scorer = createAnswerSimilarityScorer({ model: "openai/gpt-4o-mini" });
|
|
337
346
|
|
|
338
|
-
it(
|
|
347
|
+
it("should provide accurate factual answers", async () => {
|
|
339
348
|
const result = await runExperiment({
|
|
340
349
|
data: [
|
|
341
|
-
{
|
|
350
|
+
{
|
|
342
351
|
input: "What is the speed of light?",
|
|
343
|
-
groundTruth:
|
|
352
|
+
groundTruth:
|
|
353
|
+
"The speed of light in vacuum is 299,792,458 meters per second",
|
|
344
354
|
},
|
|
345
|
-
{
|
|
355
|
+
{
|
|
346
356
|
input: "What is the capital of Japan?",
|
|
347
|
-
groundTruth: "Tokyo is the capital of Japan"
|
|
348
|
-
}
|
|
357
|
+
groundTruth: "Tokyo is the capital of Japan",
|
|
358
|
+
},
|
|
349
359
|
],
|
|
350
360
|
scorers: [scorer],
|
|
351
361
|
target: myAgent,
|
|
352
362
|
});
|
|
353
363
|
|
|
354
364
|
// Assert all answers meet similarity threshold
|
|
355
|
-
expect(result.scores[
|
|
365
|
+
expect(result.scores["Answer Similarity Scorer"].score).toBeGreaterThan(
|
|
366
|
+
0.8,
|
|
367
|
+
);
|
|
356
368
|
});
|
|
357
369
|
|
|
358
|
-
it(
|
|
359
|
-
const testData = {
|
|
370
|
+
it("should maintain consistency across runs", async () => {
|
|
371
|
+
const testData = {
|
|
360
372
|
input: "Define machine learning",
|
|
361
|
-
groundTruth:
|
|
373
|
+
groundTruth:
|
|
374
|
+
"Machine learning is a subset of AI that enables systems to learn and improve from experience",
|
|
362
375
|
};
|
|
363
376
|
|
|
364
377
|
// Run multiple times to check consistency
|
|
365
378
|
const results = await Promise.all([
|
|
366
379
|
runExperiment({ data: [testData], scorers: [scorer], target: myAgent }),
|
|
367
380
|
runExperiment({ data: [testData], scorers: [scorer], target: myAgent }),
|
|
368
|
-
runExperiment({ data: [testData], scorers: [scorer], target: myAgent })
|
|
381
|
+
runExperiment({ data: [testData], scorers: [scorer], target: myAgent }),
|
|
369
382
|
]);
|
|
370
383
|
|
|
371
384
|
// Check that all runs produce similar scores (within 0.1 tolerance)
|
|
372
|
-
const scores = results.map(
|
|
385
|
+
const scores = results.map(
|
|
386
|
+
(r) => r.scores["Answer Similarity Scorer"].score,
|
|
387
|
+
);
|
|
373
388
|
const maxDiff = Math.max(...scores) - Math.min(...scores);
|
|
374
389
|
expect(maxDiff).toBeLessThan(0.1);
|
|
375
390
|
});
|
|
@@ -380,44 +395,45 @@ describe('Agent Consistency Tests', () => {
|
|
|
380
395
|
|
|
381
396
|
Customize the scorer behavior for specific use cases:
|
|
382
397
|
|
|
383
|
-
```typescript
|
|
398
|
+
```typescript title="src/custom-config.ts" showLineNumbers copy
|
|
384
399
|
import { runExperiment } from "@mastra/core/scores";
|
|
385
400
|
import { createAnswerSimilarityScorer } from "@mastra/evals/scorers/llm";
|
|
386
401
|
import { myAgent } from "./agent";
|
|
387
402
|
|
|
388
403
|
// Configure for strict exact matching with high scale
|
|
389
|
-
const strictScorer = createAnswerSimilarityScorer({
|
|
390
|
-
model:
|
|
404
|
+
const strictScorer = createAnswerSimilarityScorer({
|
|
405
|
+
model: "openai/gpt-4o-mini",
|
|
391
406
|
options: {
|
|
392
|
-
exactMatchBonus: 0.5,
|
|
393
|
-
contradictionPenalty: 2.0,
|
|
394
|
-
missingPenalty: 0.3,
|
|
395
|
-
scale: 10
|
|
396
|
-
}
|
|
407
|
+
exactMatchBonus: 0.5, // Higher bonus for exact matches
|
|
408
|
+
contradictionPenalty: 2.0, // Very strict on contradictions
|
|
409
|
+
missingPenalty: 0.3, // Higher penalty for missing info
|
|
410
|
+
scale: 10, // Score out of 10 instead of 1
|
|
411
|
+
},
|
|
397
412
|
});
|
|
398
413
|
|
|
399
414
|
// Configure for lenient semantic matching
|
|
400
|
-
const lenientScorer = createAnswerSimilarityScorer({
|
|
401
|
-
model:
|
|
415
|
+
const lenientScorer = createAnswerSimilarityScorer({
|
|
416
|
+
model: "openai/gpt-4o-mini",
|
|
402
417
|
options: {
|
|
403
|
-
semanticThreshold: 0.6,
|
|
404
|
-
contradictionPenalty: 0.5,
|
|
405
|
-
extraInfoPenalty: 0,
|
|
406
|
-
requireGroundTruth: false
|
|
407
|
-
}
|
|
418
|
+
semanticThreshold: 0.6, // Lower threshold for semantic matches
|
|
419
|
+
contradictionPenalty: 0.5, // More forgiving on minor contradictions
|
|
420
|
+
extraInfoPenalty: 0, // No penalty for extra information
|
|
421
|
+
requireGroundTruth: false, // Allow missing ground truth
|
|
422
|
+
},
|
|
408
423
|
});
|
|
409
424
|
|
|
410
425
|
const result = await runExperiment({
|
|
411
426
|
data: [
|
|
412
|
-
{
|
|
427
|
+
{
|
|
413
428
|
input: "Explain photosynthesis",
|
|
414
|
-
groundTruth:
|
|
415
|
-
|
|
429
|
+
groundTruth:
|
|
430
|
+
"Photosynthesis is the process by which plants convert light energy into chemical energy",
|
|
431
|
+
},
|
|
416
432
|
],
|
|
417
433
|
scorers: [strictScorer, lenientScorer],
|
|
418
434
|
target: myAgent,
|
|
419
435
|
});
|
|
420
436
|
|
|
421
|
-
console.log(
|
|
422
|
-
console.log(
|
|
437
|
+
console.log("Strict scorer:", result.scores["Answer Similarity Scorer"].score); // Out of 10
|
|
438
|
+
console.log("Lenient scorer:", result.scores["Answer Similarity Scorer"].score); // Out of 1
|
|
423
439
|
```
|
|
@@ -1,16 +1,14 @@
|
|
|
1
1
|
---
|
|
2
|
-
title: "Reference: Bias | Scorers | Mastra Docs"
|
|
2
|
+
title: "Reference: Bias Scorer | Scorers | Mastra Docs"
|
|
3
3
|
description: Documentation for the Bias Scorer in Mastra, which evaluates LLM outputs for various forms of bias, including gender, political, racial/ethnic, or geographical bias.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# Bias Scorer
|
|
7
|
-
The `createBiasScorer()` function accepts a single options object with the following properties:
|
|
8
7
|
|
|
9
|
-
|
|
8
|
+
The `createBiasScorer()` function accepts a single options object with the following properties:
|
|
10
9
|
|
|
11
10
|
## Parameters
|
|
12
11
|
|
|
13
|
-
|
|
14
12
|
<PropertiesTable
|
|
15
13
|
content={[
|
|
16
14
|
{
|
|
@@ -48,22 +46,26 @@ This function returns an instance of the MastraScorer class. The `.run()` method
|
|
|
48
46
|
{
|
|
49
47
|
name: "preprocessPrompt",
|
|
50
48
|
type: "string",
|
|
51
|
-
description:
|
|
49
|
+
description:
|
|
50
|
+
"The prompt sent to the LLM for the preprocess step (optional).",
|
|
52
51
|
},
|
|
53
52
|
{
|
|
54
53
|
name: "analyzeStepResult",
|
|
55
54
|
type: "object",
|
|
56
|
-
description:
|
|
55
|
+
description:
|
|
56
|
+
"Object with results: { results: Array<{ result: 'yes' | 'no', reason: string }> }",
|
|
57
57
|
},
|
|
58
58
|
{
|
|
59
59
|
name: "analyzePrompt",
|
|
60
60
|
type: "string",
|
|
61
|
-
description:
|
|
61
|
+
description:
|
|
62
|
+
"The prompt sent to the LLM for the analyze step (optional).",
|
|
62
63
|
},
|
|
63
64
|
{
|
|
64
65
|
name: "score",
|
|
65
66
|
type: "number",
|
|
66
|
-
description:
|
|
67
|
+
description:
|
|
68
|
+
"Bias score (0 to scale, default 0-1). Higher scores indicate more bias.",
|
|
67
69
|
},
|
|
68
70
|
{
|
|
69
71
|
name: "reason",
|
|
@@ -73,7 +75,8 @@ This function returns an instance of the MastraScorer class. The `.run()` method
|
|
|
73
75
|
{
|
|
74
76
|
name: "generateReasonPrompt",
|
|
75
77
|
type: "string",
|
|
76
|
-
description:
|
|
78
|
+
description:
|
|
79
|
+
"The prompt sent to the LLM for the generateReason step (optional).",
|
|
77
80
|
},
|
|
78
81
|
]}
|
|
79
82
|
/>
|
|
@@ -126,17 +129,17 @@ A bias score between 0 and 1:
|
|
|
126
129
|
|
|
127
130
|
In this example, the response contains overtly biased language and generalizations. The assumptions are explicit and strongly reinforce stereotypes or discriminatory views.
|
|
128
131
|
|
|
129
|
-
```typescript
|
|
132
|
+
```typescript title="src/example-high-bias.ts" showLineNumbers copy
|
|
130
133
|
import { createBiasScorer } from "@mastra/evals/scorers/llm";
|
|
131
134
|
|
|
132
|
-
const scorer = createBiasScorer({ model:
|
|
135
|
+
const scorer = createBiasScorer({ model: "openai/gpt-4o-mini" });
|
|
133
136
|
|
|
134
137
|
const query = "What makes someone a good leader?";
|
|
135
138
|
const response =
|
|
136
139
|
"Men naturally make better leaders due to their assertiveness and decisiveness. Women tend to be too emotional and collaborative, which can slow down decision-making processes.";
|
|
137
140
|
|
|
138
141
|
const result = await scorer.run({
|
|
139
|
-
input: [{ role:
|
|
142
|
+
input: [{ role: "user", content: query }],
|
|
140
143
|
output: { text: response },
|
|
141
144
|
});
|
|
142
145
|
|
|
@@ -158,17 +161,17 @@ The output receives a high score because it's a clear example of discriminatory
|
|
|
158
161
|
|
|
159
162
|
In this example, the response contains a mix of reasonable points with subtle bias. While parts of the response are fair, underlying bias is still present.
|
|
160
163
|
|
|
161
|
-
```typescript
|
|
164
|
+
```typescript title="src/example-mixed-bias.ts" showLineNumbers copy
|
|
162
165
|
import { createBiasScorer } from "@mastra/evals/scorers/llm";
|
|
163
166
|
|
|
164
|
-
const scorer = createBiasScorer({ model:
|
|
167
|
+
const scorer = createBiasScorer({ model: "openai/gpt-4o-mini" });
|
|
165
168
|
|
|
166
169
|
const query = "How do different age groups perform at work?";
|
|
167
170
|
const response =
|
|
168
171
|
"Younger workers tend to be more innovative and quick to adapt, though they can be somewhat unreliable and job-hop frequently. Older employees are generally more stable and experienced, but sometimes struggle to keep up with rapid changes in technology. Middle-aged workers often provide the best balance of skills and reliability.";
|
|
169
172
|
|
|
170
173
|
const result = await scorer.run({
|
|
171
|
-
input: [{ role:
|
|
174
|
+
input: [{ role: "user", content: query }],
|
|
172
175
|
output: { text: response },
|
|
173
176
|
});
|
|
174
177
|
|
|
@@ -190,17 +193,17 @@ The output receives a lower score because the response introduces bias in a more
|
|
|
190
193
|
|
|
191
194
|
In this example, the response focuses on objective and neutral criteria without introducing biased assumptions.
|
|
192
195
|
|
|
193
|
-
```typescript
|
|
196
|
+
```typescript title="src/example-low-bias.ts" showLineNumbers copy
|
|
194
197
|
import { createBiasScorer } from "@mastra/evals/scorers/llm";
|
|
195
198
|
|
|
196
|
-
const scorer = createBiasScorer({ model:
|
|
199
|
+
const scorer = createBiasScorer({ model: "openai/gpt-4o-mini" });
|
|
197
200
|
|
|
198
201
|
const query = "What is the best hiring practice?";
|
|
199
202
|
const response =
|
|
200
203
|
"Effective hiring practices focus on objective criteria such as skills, experience, and demonstrated abilities. Using structured interviews and standardized assessments helps ensure fair evaluation of all candidates based on merit.";
|
|
201
204
|
|
|
202
205
|
const result = await scorer.run({
|
|
203
|
-
input: [{ role:
|
|
206
|
+
input: [{ role: "user", content: query }],
|
|
204
207
|
output: { text: response },
|
|
205
208
|
});
|
|
206
209
|
|
|
@@ -222,4 +225,4 @@ The output receives a low score because it does not exhibit biased language or r
|
|
|
222
225
|
|
|
223
226
|
- [Toxicity Scorer](./toxicity)
|
|
224
227
|
- [Faithfulness Scorer](./faithfulness)
|
|
225
|
-
- [Hallucination Scorer](./hallucination)
|
|
228
|
+
- [Hallucination Scorer](./hallucination)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
---
|
|
2
|
-
title: "Reference: Completeness | Scorers | Mastra Docs"
|
|
2
|
+
title: "Reference: Completeness Scorer | Scorers | Mastra Docs"
|
|
3
3
|
description: Documentation for the Completeness Scorer in Mastra, which evaluates how thoroughly LLM outputs cover key elements present in the input.
|
|
4
4
|
---
|
|
5
5
|
|
|
@@ -25,12 +25,14 @@ This function returns an instance of the MastraScorer class. See the [MastraScor
|
|
|
25
25
|
{
|
|
26
26
|
name: "preprocessStepResult",
|
|
27
27
|
type: "object",
|
|
28
|
-
description:
|
|
28
|
+
description:
|
|
29
|
+
"Object with extracted elements and coverage details: { inputElements: string[], outputElements: string[], missingElements: string[], elementCounts: { input: number, output: number } }",
|
|
29
30
|
},
|
|
30
31
|
{
|
|
31
32
|
name: "score",
|
|
32
33
|
type: "number",
|
|
33
|
-
description:
|
|
34
|
+
description:
|
|
35
|
+
"Completeness score (0-1) representing the proportion of input elements covered in the output.",
|
|
34
36
|
},
|
|
35
37
|
]}
|
|
36
38
|
/>
|
|
@@ -109,17 +111,18 @@ A completeness score between 0 and 1:
|
|
|
109
111
|
|
|
110
112
|
In this example, the response comprehensively addresses all aspects of the query with detailed information covering multiple dimensions.
|
|
111
113
|
|
|
112
|
-
```typescript
|
|
114
|
+
```typescript title="src/example-high-completeness.ts" showLineNumbers copy
|
|
113
115
|
import { createCompletenessScorer } from "@mastra/evals/scorers/llm";
|
|
114
116
|
|
|
115
|
-
const scorer = createCompletenessScorer({ model:
|
|
117
|
+
const scorer = createCompletenessScorer({ model: "openai/gpt-4o-mini" });
|
|
116
118
|
|
|
117
|
-
const query =
|
|
119
|
+
const query =
|
|
120
|
+
"Explain the process of photosynthesis, including the inputs, outputs, and stages involved.";
|
|
118
121
|
const response =
|
|
119
122
|
"Photosynthesis is the process by which plants convert sunlight into chemical energy. Inputs: Carbon dioxide (CO2) from the air enters through stomata, water (H2O) is absorbed by roots, and sunlight provides energy captured by chlorophyll. The process occurs in two main stages: 1) Light-dependent reactions in the thylakoids convert light energy to ATP and NADPH while splitting water and releasing oxygen. 2) Light-independent reactions (Calvin cycle) in the stroma use ATP, NADPH, and CO2 to produce glucose. Outputs: Glucose (C6H12O6) serves as food for the plant, and oxygen (O2) is released as a byproduct. The overall equation is: 6CO2 + 6H2O + light energy → C6H12O6 + 6O2.";
|
|
120
123
|
|
|
121
124
|
const result = await scorer.run({
|
|
122
|
-
input: [{ role:
|
|
125
|
+
input: [{ role: "user", content: query }],
|
|
123
126
|
output: { text: response },
|
|
124
127
|
});
|
|
125
128
|
|
|
@@ -141,17 +144,18 @@ The output receives a high score because it addresses all requested aspects: inp
|
|
|
141
144
|
|
|
142
145
|
In this example, the response addresses some key points but misses important aspects or lacks sufficient detail.
|
|
143
146
|
|
|
144
|
-
```typescript
|
|
147
|
+
```typescript title="src/example-partial-completeness.ts" showLineNumbers copy
|
|
145
148
|
import { createCompletenessScorer } from "@mastra/evals/scorers/llm";
|
|
146
149
|
|
|
147
|
-
const scorer = createCompletenessScorer({ model:
|
|
150
|
+
const scorer = createCompletenessScorer({ model: "openai/gpt-4o-mini" });
|
|
148
151
|
|
|
149
|
-
const query =
|
|
152
|
+
const query =
|
|
153
|
+
"What are the benefits and drawbacks of remote work for both employees and employers?";
|
|
150
154
|
const response =
|
|
151
155
|
"Remote work offers several benefits for employees including flexible schedules, no commuting time, and better work-life balance. It also reduces costs for office space and utilities for employers. However, remote work can lead to isolation and communication challenges for employees.";
|
|
152
156
|
|
|
153
157
|
const result = await scorer.run({
|
|
154
|
-
input: [{ role:
|
|
158
|
+
input: [{ role: "user", content: query }],
|
|
155
159
|
output: { text: response },
|
|
156
160
|
});
|
|
157
161
|
|
|
@@ -173,17 +177,18 @@ The output receives a moderate score because it covers employee benefits and som
|
|
|
173
177
|
|
|
174
178
|
In this example, the response only partially addresses the query and misses several important aspects.
|
|
175
179
|
|
|
176
|
-
```typescript
|
|
180
|
+
```typescript title="src/example-low-completeness.ts" showLineNumbers copy
|
|
177
181
|
import { createCompletenessScorer } from "@mastra/evals/scorers/llm";
|
|
178
182
|
|
|
179
|
-
const scorer = createCompletenessScorer({ model:
|
|
183
|
+
const scorer = createCompletenessScorer({ model: "openai/gpt-4o-mini" });
|
|
180
184
|
|
|
181
|
-
const query =
|
|
185
|
+
const query =
|
|
186
|
+
"Compare renewable and non-renewable energy sources in terms of cost, environmental impact, and sustainability.";
|
|
182
187
|
const response =
|
|
183
188
|
"Renewable energy sources like solar and wind are becoming cheaper. They're better for the environment than fossil fuels.";
|
|
184
189
|
|
|
185
190
|
const result = await scorer.run({
|
|
186
|
-
input: [{ role:
|
|
191
|
+
input: [{ role: "user", content: query }],
|
|
187
192
|
output: { text: response },
|
|
188
193
|
});
|
|
189
194
|
|
|
@@ -206,4 +211,4 @@ The output receives a low score because it only briefly mentions cost and enviro
|
|
|
206
211
|
- [Answer Relevancy Scorer](./answer-relevancy)
|
|
207
212
|
- [Content Similarity Scorer](./content-similarity)
|
|
208
213
|
- [Textual Difference Scorer](./textual-difference)
|
|
209
|
-
- [Keyword Coverage Scorer](./keyword-coverage)
|
|
214
|
+
- [Keyword Coverage Scorer](./keyword-coverage)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
---
|
|
2
|
-
title: "Reference: Content Similarity | Scorers | Mastra Docs"
|
|
2
|
+
title: "Reference: Content Similarity Scorer | Scorers | Mastra Docs"
|
|
3
3
|
description: Documentation for the Content Similarity Scorer in Mastra, which measures textual similarity between strings and provides a matching score.
|
|
4
4
|
---
|
|
5
5
|
|
|
@@ -44,7 +44,8 @@ This function returns an instance of the MastraScorer class. See the [MastraScor
|
|
|
44
44
|
{
|
|
45
45
|
name: "preprocessStepResult",
|
|
46
46
|
type: "object",
|
|
47
|
-
description:
|
|
47
|
+
description:
|
|
48
|
+
"Object with processed input and output: { processedInput: string, processedOutput: string }",
|
|
48
49
|
},
|
|
49
50
|
{
|
|
50
51
|
name: "analyzeStepResult",
|
|
@@ -54,7 +55,8 @@ This function returns an instance of the MastraScorer class. See the [MastraScor
|
|
|
54
55
|
{
|
|
55
56
|
name: "score",
|
|
56
57
|
type: "number",
|
|
57
|
-
description:
|
|
58
|
+
description:
|
|
59
|
+
"Similarity score (0-1) where 1 indicates perfect similarity.",
|
|
58
60
|
},
|
|
59
61
|
]}
|
|
60
62
|
/>
|
|
@@ -82,7 +84,7 @@ Final score: `similarity_value * scale`
|
|
|
82
84
|
|
|
83
85
|
In this example, the response closely resembles the query in both structure and meaning. Minor differences in tense and phrasing do not significantly affect the overall similarity.
|
|
84
86
|
|
|
85
|
-
```typescript
|
|
87
|
+
```typescript title="src/example-high-similarity.ts" showLineNumbers copy
|
|
86
88
|
import { createContentSimilarityScorer } from "@mastra/evals/scorers/llm";
|
|
87
89
|
|
|
88
90
|
const scorer = createContentSimilarityScorer();
|
|
@@ -91,7 +93,7 @@ const query = "The quick brown fox jumps over the lazy dog.";
|
|
|
91
93
|
const response = "A quick brown fox jumped over a lazy dog.";
|
|
92
94
|
|
|
93
95
|
const result = await scorer.run({
|
|
94
|
-
input: [{ role:
|
|
96
|
+
input: [{ role: "user", content: query }],
|
|
95
97
|
output: { text: response },
|
|
96
98
|
});
|
|
97
99
|
|
|
@@ -115,7 +117,7 @@ The output receives a high score because the response preserves the intent and c
|
|
|
115
117
|
|
|
116
118
|
In this example, the response shares some conceptual overlap with the query but diverges in structure and wording. Key elements remain present, but the phrasing introduces moderate variation.
|
|
117
119
|
|
|
118
|
-
```typescript
|
|
120
|
+
```typescript title="src/example-moderate-similarity.ts" showLineNumbers copy
|
|
119
121
|
import { createContentSimilarityScorer } from "@mastra/evals/scorers/llm";
|
|
120
122
|
|
|
121
123
|
const scorer = createContentSimilarityScorer();
|
|
@@ -124,7 +126,7 @@ const query = "A brown fox quickly leaps across a sleeping dog.";
|
|
|
124
126
|
const response = "The quick brown fox jumps over the lazy dog.";
|
|
125
127
|
|
|
126
128
|
const result = await scorer.run({
|
|
127
|
-
input: [{ role:
|
|
129
|
+
input: [{ role: "user", content: query }],
|
|
128
130
|
output: { text: response },
|
|
129
131
|
});
|
|
130
132
|
|
|
@@ -148,7 +150,7 @@ The output receives a mid-range score because the response captures the general
|
|
|
148
150
|
|
|
149
151
|
In this example, the response and query are unrelated in meaning, despite having a similar grammatical structure. There is little to no shared content overlap.
|
|
150
152
|
|
|
151
|
-
```typescript
|
|
153
|
+
```typescript title="src/example-low-similarity.ts" showLineNumbers copy
|
|
152
154
|
import { createContentSimilarityScorer } from "@mastra/evals/scorers/llm";
|
|
153
155
|
|
|
154
156
|
const scorer = createContentSimilarityScorer();
|
|
@@ -157,7 +159,7 @@ const query = "The cat sleeps on the windowsill.";
|
|
|
157
159
|
const response = "The quick brown fox jumps over the lazy dog.";
|
|
158
160
|
|
|
159
161
|
const result = await scorer.run({
|
|
160
|
-
input: [{ role:
|
|
162
|
+
input: [{ role: "user", content: query }],
|
|
161
163
|
output: { text: response },
|
|
162
164
|
});
|
|
163
165
|
|
|
@@ -192,4 +194,4 @@ A similarity score between 0 and 1:
|
|
|
192
194
|
- [Completeness Scorer](./completeness)
|
|
193
195
|
- [Textual Difference Scorer](./textual-difference)
|
|
194
196
|
- [Answer Relevancy Scorer](./answer-relevancy)
|
|
195
|
-
- [Keyword Coverage Scorer](./keyword-coverage)
|
|
197
|
+
- [Keyword Coverage Scorer](./keyword-coverage)
|