@mastra/mcp-docs-server 0.13.37 → 0.13.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/organized/changelogs/%40internal%2Fchangeset-cli.md +2 -0
- package/.docs/organized/changelogs/%40internal%2Fexternal-types.md +2 -0
- package/.docs/organized/changelogs/%40internal%2Fstorage-test-utils.md +8 -8
- package/.docs/organized/changelogs/%40internal%2Ftypes-builder.md +2 -0
- package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +23 -23
- package/.docs/organized/changelogs/%40mastra%2Fcore.md +69 -69
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloud.md +17 -17
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +17 -17
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-netlify.md +17 -17
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-vercel.md +17 -17
- package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +25 -25
- package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +15 -15
- package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +27 -27
- package/.docs/organized/changelogs/%40mastra%2Freact.md +15 -15
- package/.docs/organized/changelogs/%40mastra%2Fserver.md +23 -23
- package/.docs/organized/changelogs/create-mastra.md +5 -5
- package/.docs/organized/changelogs/mastra.md +17 -17
- package/.docs/raw/agents/adding-voice.mdx +20 -9
- package/.docs/raw/agents/agent-memory.mdx +55 -39
- package/.docs/raw/agents/guardrails.mdx +68 -61
- package/.docs/raw/agents/networks.mdx +50 -46
- package/.docs/raw/agents/overview.mdx +125 -88
- package/.docs/raw/agents/using-tools.mdx +14 -15
- package/.docs/raw/auth/auth0.mdx +28 -27
- package/.docs/raw/auth/clerk.mdx +22 -20
- package/.docs/raw/auth/firebase.mdx +42 -39
- package/.docs/raw/auth/index.mdx +1 -1
- package/.docs/raw/auth/jwt.mdx +18 -16
- package/.docs/raw/auth/supabase.mdx +20 -18
- package/.docs/raw/auth/workos.mdx +32 -26
- package/.docs/raw/community/contributing-templates.mdx +7 -7
- package/.docs/raw/community/discord.mdx +2 -2
- package/.docs/raw/community/licensing.mdx +1 -1
- package/.docs/raw/course/03-agent-memory/26-updating-mastra-export-comprehensive.md +0 -32
- package/.docs/raw/deployment/cloud-providers/amazon-ec2.mdx +41 -22
- package/.docs/raw/deployment/cloud-providers/aws-lambda.mdx +48 -29
- package/.docs/raw/deployment/cloud-providers/azure-app-services.mdx +52 -24
- package/.docs/raw/deployment/cloud-providers/digital-ocean.mdx +86 -55
- package/.docs/raw/deployment/cloud-providers/index.mdx +16 -13
- package/.docs/raw/deployment/monorepo.mdx +32 -42
- package/.docs/raw/deployment/overview.mdx +15 -15
- package/.docs/raw/deployment/server-deployment.mdx +18 -23
- package/.docs/raw/deployment/serverless-platforms/cloudflare-deployer.mdx +16 -17
- package/.docs/raw/deployment/serverless-platforms/index.mdx +15 -12
- package/.docs/raw/deployment/serverless-platforms/netlify-deployer.mdx +14 -23
- package/.docs/raw/deployment/serverless-platforms/vercel-deployer.mdx +16 -23
- package/.docs/raw/deployment/web-framework.mdx +14 -14
- package/.docs/raw/frameworks/agentic-uis/ai-sdk.mdx +138 -145
- package/.docs/raw/frameworks/agentic-uis/assistant-ui.mdx +54 -43
- package/.docs/raw/frameworks/agentic-uis/cedar-os.mdx +51 -36
- package/.docs/raw/frameworks/agentic-uis/copilotkit.mdx +161 -120
- package/.docs/raw/frameworks/agentic-uis/openrouter.mdx +59 -48
- package/.docs/raw/frameworks/servers/express.mdx +45 -44
- package/.docs/raw/frameworks/web-frameworks/astro.mdx +245 -162
- package/.docs/raw/frameworks/web-frameworks/next-js.mdx +112 -69
- package/.docs/raw/frameworks/web-frameworks/sveltekit.mdx +189 -164
- package/.docs/raw/frameworks/web-frameworks/vite-react.mdx +94 -81
- package/.docs/raw/getting-started/installation.mdx +164 -133
- package/.docs/raw/getting-started/mcp-docs-server.mdx +36 -39
- package/.docs/raw/getting-started/project-structure.mdx +34 -42
- package/.docs/raw/getting-started/studio.mdx +40 -58
- package/.docs/raw/getting-started/templates.mdx +22 -27
- package/.docs/raw/index.mdx +9 -9
- package/.docs/raw/mastra-cloud/dashboard.mdx +10 -10
- package/.docs/raw/mastra-cloud/observability.mdx +7 -8
- package/.docs/raw/mastra-cloud/overview.mdx +16 -22
- package/.docs/raw/mastra-cloud/setting-up.mdx +33 -20
- package/.docs/raw/memory/conversation-history.mdx +2 -2
- package/.docs/raw/memory/overview.mdx +21 -23
- package/.docs/raw/memory/semantic-recall.mdx +14 -13
- package/.docs/raw/memory/storage/memory-with-libsql.mdx +27 -28
- package/.docs/raw/memory/storage/memory-with-pg.mdx +26 -26
- package/.docs/raw/memory/storage/memory-with-upstash.mdx +26 -27
- package/.docs/raw/memory/threads-and-resources.mdx +23 -20
- package/.docs/raw/memory/working-memory.mdx +27 -38
- package/.docs/raw/observability/ai-tracing/exporters/arize.mdx +30 -29
- package/.docs/raw/observability/ai-tracing/exporters/braintrust.mdx +8 -9
- package/.docs/raw/observability/ai-tracing/exporters/cloud.mdx +17 -16
- package/.docs/raw/observability/ai-tracing/exporters/default.mdx +31 -32
- package/.docs/raw/observability/ai-tracing/exporters/langfuse.mdx +18 -17
- package/.docs/raw/observability/ai-tracing/exporters/langsmith.mdx +14 -14
- package/.docs/raw/observability/ai-tracing/exporters/otel.mdx +56 -46
- package/.docs/raw/observability/ai-tracing/overview.mdx +145 -122
- package/.docs/raw/observability/ai-tracing/processors/sensitive-data-filter.mdx +57 -36
- package/.docs/raw/observability/logging.mdx +14 -17
- package/.docs/raw/observability/nextjs-tracing.mdx +5 -5
- package/.docs/raw/observability/otel-tracing.mdx +17 -18
- package/.docs/raw/observability/overview.mdx +14 -10
- package/.docs/raw/rag/chunking-and-embedding.mdx +10 -8
- package/.docs/raw/rag/overview.mdx +5 -5
- package/.docs/raw/rag/retrieval.mdx +125 -107
- package/.docs/raw/rag/vector-databases.mdx +232 -223
- package/.docs/raw/reference/agents/agent.mdx +54 -41
- package/.docs/raw/reference/agents/generate.mdx +367 -261
- package/.docs/raw/reference/agents/generateLegacy.mdx +260 -178
- package/.docs/raw/reference/agents/getDefaultGenerateOptions.mdx +7 -5
- package/.docs/raw/reference/agents/getDefaultStreamOptions.mdx +7 -5
- package/.docs/raw/reference/agents/getDescription.mdx +3 -2
- package/.docs/raw/reference/agents/getInstructions.mdx +7 -5
- package/.docs/raw/reference/agents/getLLM.mdx +11 -7
- package/.docs/raw/reference/agents/getMemory.mdx +7 -5
- package/.docs/raw/reference/agents/getModel.mdx +7 -5
- package/.docs/raw/reference/agents/getScorers.mdx +7 -5
- package/.docs/raw/reference/agents/getTools.mdx +7 -5
- package/.docs/raw/reference/agents/getVoice.mdx +7 -5
- package/.docs/raw/reference/agents/getWorkflows.mdx +7 -5
- package/.docs/raw/reference/agents/listAgents.mdx +6 -4
- package/.docs/raw/reference/agents/network.mdx +171 -116
- package/.docs/raw/reference/auth/auth0.mdx +18 -14
- package/.docs/raw/reference/auth/clerk.mdx +15 -12
- package/.docs/raw/reference/auth/firebase.mdx +23 -16
- package/.docs/raw/reference/auth/jwt.mdx +7 -6
- package/.docs/raw/reference/auth/supabase.mdx +13 -10
- package/.docs/raw/reference/auth/workos.mdx +17 -13
- package/.docs/raw/reference/cli/create-mastra.mdx +61 -44
- package/.docs/raw/reference/cli/mastra.mdx +11 -11
- package/.docs/raw/reference/client-js/agents.mdx +40 -43
- package/.docs/raw/reference/client-js/error-handling.mdx +1 -1
- package/.docs/raw/reference/client-js/logs.mdx +1 -1
- package/.docs/raw/reference/client-js/mastra-client.mdx +21 -13
- package/.docs/raw/reference/client-js/memory.mdx +6 -2
- package/.docs/raw/reference/client-js/observability.mdx +10 -9
- package/.docs/raw/reference/client-js/telemetry.mdx +1 -1
- package/.docs/raw/reference/client-js/tools.mdx +1 -1
- package/.docs/raw/reference/client-js/vectors.mdx +1 -1
- package/.docs/raw/reference/client-js/workflows-legacy.mdx +1 -1
- package/.docs/raw/reference/client-js/workflows.mdx +7 -7
- package/.docs/raw/reference/core/getAgent.mdx +6 -5
- package/.docs/raw/reference/core/getAgentById.mdx +5 -4
- package/.docs/raw/reference/core/getAgents.mdx +3 -3
- package/.docs/raw/reference/core/getDeployer.mdx +4 -3
- package/.docs/raw/reference/core/getLogger.mdx +4 -3
- package/.docs/raw/reference/core/getLogs.mdx +10 -6
- package/.docs/raw/reference/core/getLogsByRunId.mdx +8 -5
- package/.docs/raw/reference/core/getMCPServer.mdx +8 -5
- package/.docs/raw/reference/core/getMCPServers.mdx +4 -3
- package/.docs/raw/reference/core/getMemory.mdx +4 -3
- package/.docs/raw/reference/core/getScorer.mdx +15 -12
- package/.docs/raw/reference/core/getScorerByName.mdx +12 -9
- package/.docs/raw/reference/core/getScorers.mdx +3 -2
- package/.docs/raw/reference/core/getServer.mdx +4 -3
- package/.docs/raw/reference/core/getStorage.mdx +4 -3
- package/.docs/raw/reference/core/getTelemetry.mdx +4 -3
- package/.docs/raw/reference/core/getVector.mdx +6 -4
- package/.docs/raw/reference/core/getVectors.mdx +4 -3
- package/.docs/raw/reference/core/getWorkflow.mdx +7 -4
- package/.docs/raw/reference/core/getWorkflows.mdx +5 -3
- package/.docs/raw/reference/core/mastra-class.mdx +16 -16
- package/.docs/raw/reference/core/setLogger.mdx +6 -4
- package/.docs/raw/reference/core/setStorage.mdx +4 -4
- package/.docs/raw/reference/core/setTelemetry.mdx +4 -3
- package/.docs/raw/reference/deployer/cloudflare.mdx +11 -7
- package/.docs/raw/reference/deployer/deployer.mdx +2 -1
- package/.docs/raw/reference/deployer/netlify.mdx +4 -4
- package/.docs/raw/reference/deployer/vercel.mdx +6 -6
- package/.docs/raw/reference/evals/answer-relevancy.mdx +4 -5
- package/.docs/raw/reference/evals/bias.mdx +4 -5
- package/.docs/raw/reference/evals/completeness.mdx +4 -5
- package/.docs/raw/reference/evals/content-similarity.mdx +4 -5
- package/.docs/raw/reference/evals/context-position.mdx +4 -5
- package/.docs/raw/reference/evals/context-precision.mdx +4 -5
- package/.docs/raw/reference/evals/context-relevancy.mdx +4 -5
- package/.docs/raw/reference/evals/contextual-recall.mdx +4 -5
- package/.docs/raw/reference/evals/faithfulness.mdx +4 -5
- package/.docs/raw/reference/evals/hallucination.mdx +4 -6
- package/.docs/raw/reference/evals/keyword-coverage.mdx +4 -5
- package/.docs/raw/reference/evals/prompt-alignment.mdx +4 -6
- package/.docs/raw/reference/evals/summarization.mdx +4 -6
- package/.docs/raw/reference/evals/textual-difference.mdx +4 -5
- package/.docs/raw/reference/evals/tone-consistency.mdx +4 -7
- package/.docs/raw/reference/evals/toxicity.mdx +4 -5
- package/.docs/raw/reference/index.mdx +3 -3
- package/.docs/raw/reference/legacyWorkflows/after.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/afterEvent.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/commit.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/createRun.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/else.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/events.mdx +7 -7
- package/.docs/raw/reference/legacyWorkflows/execute.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/if.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/resume.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/resumeWithEvent.mdx +6 -6
- package/.docs/raw/reference/legacyWorkflows/snapshots.mdx +6 -6
- package/.docs/raw/reference/legacyWorkflows/start.mdx +6 -6
- package/.docs/raw/reference/legacyWorkflows/step-class.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/step-condition.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/step-function.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/step-options.mdx +6 -6
- package/.docs/raw/reference/legacyWorkflows/step-retries.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/suspend.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/then.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/until.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/watch.mdx +3 -3
- package/.docs/raw/reference/legacyWorkflows/while.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/workflow.mdx +7 -7
- package/.docs/raw/reference/memory/createThread.mdx +10 -10
- package/.docs/raw/reference/memory/deleteMessages.mdx +7 -5
- package/.docs/raw/reference/memory/getThreadById.mdx +6 -5
- package/.docs/raw/reference/memory/getThreadsByResourceId.mdx +11 -10
- package/.docs/raw/reference/memory/getThreadsByResourceIdPaginated.mdx +10 -9
- package/.docs/raw/reference/memory/{Memory.mdx → memory-class.mdx} +53 -46
- package/.docs/raw/reference/memory/query.mdx +39 -25
- package/.docs/raw/reference/observability/ai-tracing/ai-tracing.mdx +9 -5
- package/.docs/raw/reference/observability/ai-tracing/configuration.mdx +26 -22
- package/.docs/raw/reference/observability/ai-tracing/exporters/arize.mdx +16 -12
- package/.docs/raw/reference/observability/ai-tracing/exporters/braintrust.mdx +14 -14
- package/.docs/raw/reference/observability/ai-tracing/exporters/cloud-exporter.mdx +16 -11
- package/.docs/raw/reference/observability/ai-tracing/exporters/console-exporter.mdx +10 -6
- package/.docs/raw/reference/observability/ai-tracing/exporters/default-exporter.mdx +23 -15
- package/.docs/raw/reference/observability/ai-tracing/exporters/langfuse.mdx +6 -6
- package/.docs/raw/reference/observability/ai-tracing/exporters/langsmith.mdx +13 -13
- package/.docs/raw/reference/observability/ai-tracing/exporters/otel.mdx +44 -36
- package/.docs/raw/reference/observability/ai-tracing/interfaces.mdx +115 -106
- package/.docs/raw/reference/observability/ai-tracing/processors/sensitive-data-filter.mdx +37 -32
- package/.docs/raw/reference/observability/ai-tracing/span.mdx +29 -26
- package/.docs/raw/reference/observability/logging/pino-logger.mdx +13 -15
- package/.docs/raw/reference/observability/otel-tracing/otel-config.mdx +2 -2
- package/.docs/raw/reference/observability/otel-tracing/providers/arize-ax.mdx +3 -3
- package/.docs/raw/reference/observability/otel-tracing/providers/arize-phoenix.mdx +2 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/dash0.mdx +1 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/index.mdx +12 -14
- package/.docs/raw/reference/observability/otel-tracing/providers/keywordsai.mdx +3 -3
- package/.docs/raw/reference/observability/otel-tracing/providers/laminar.mdx +1 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/langfuse.mdx +4 -4
- package/.docs/raw/reference/observability/otel-tracing/providers/langsmith.mdx +1 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/langwatch.mdx +2 -2
- package/.docs/raw/reference/observability/otel-tracing/providers/new-relic.mdx +1 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/signoz.mdx +1 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/traceloop.mdx +1 -1
- package/.docs/raw/reference/processors/batch-parts-processor.mdx +13 -10
- package/.docs/raw/reference/processors/language-detector.mdx +27 -17
- package/.docs/raw/reference/processors/moderation-processor.mdx +26 -17
- package/.docs/raw/reference/processors/pii-detector.mdx +28 -18
- package/.docs/raw/reference/processors/prompt-injection-detector.mdx +25 -17
- package/.docs/raw/reference/processors/system-prompt-scrubber.mdx +26 -17
- package/.docs/raw/reference/processors/token-limiter-processor.mdx +22 -15
- package/.docs/raw/reference/processors/unicode-normalizer.mdx +13 -12
- package/.docs/raw/reference/rag/chunk.mdx +41 -23
- package/.docs/raw/reference/rag/database-config.mdx +56 -38
- package/.docs/raw/reference/rag/document.mdx +1 -1
- package/.docs/raw/reference/rag/embeddings.mdx +1 -1
- package/.docs/raw/reference/rag/extract-params.mdx +1 -1
- package/.docs/raw/reference/rag/graph-rag.mdx +1 -1
- package/.docs/raw/reference/rag/metadata-filters.mdx +23 -26
- package/.docs/raw/reference/rag/rerank.mdx +1 -1
- package/.docs/raw/reference/rag/rerankWithScorer.mdx +2 -2
- package/.docs/raw/reference/scorers/answer-relevancy.mdx +35 -17
- package/.docs/raw/reference/scorers/answer-similarity.mdx +84 -68
- package/.docs/raw/reference/scorers/bias.mdx +22 -19
- package/.docs/raw/reference/scorers/completeness.mdx +21 -16
- package/.docs/raw/reference/scorers/content-similarity.mdx +12 -10
- package/.docs/raw/reference/scorers/context-precision.mdx +73 -64
- package/.docs/raw/reference/scorers/context-relevance.mdx +142 -126
- package/.docs/raw/reference/scorers/create-scorer.mdx +93 -61
- package/.docs/raw/reference/scorers/faithfulness.mdx +21 -13
- package/.docs/raw/reference/scorers/hallucination.mdx +17 -12
- package/.docs/raw/reference/scorers/keyword-coverage.mdx +32 -27
- package/.docs/raw/reference/scorers/mastra-scorer.mdx +39 -33
- package/.docs/raw/reference/scorers/noise-sensitivity.mdx +203 -152
- package/.docs/raw/reference/scorers/prompt-alignment.mdx +186 -132
- package/.docs/raw/reference/scorers/run-experiment.mdx +40 -31
- package/.docs/raw/reference/scorers/textual-difference.mdx +25 -26
- package/.docs/raw/reference/scorers/tone-consistency.mdx +29 -26
- package/.docs/raw/reference/scorers/tool-call-accuracy.mdx +180 -176
- package/.docs/raw/reference/scorers/toxicity.mdx +35 -31
- package/.docs/raw/reference/storage/cloudflare-d1.mdx +1 -1
- package/.docs/raw/reference/storage/cloudflare.mdx +1 -1
- package/.docs/raw/reference/storage/dynamodb.mdx +1 -1
- package/.docs/raw/reference/storage/lance.mdx +1 -1
- package/.docs/raw/reference/storage/libsql.mdx +2 -2
- package/.docs/raw/reference/storage/mongodb.mdx +4 -5
- package/.docs/raw/reference/storage/mssql.mdx +5 -4
- package/.docs/raw/reference/storage/postgresql.mdx +35 -33
- package/.docs/raw/reference/storage/upstash.mdx +6 -5
- package/.docs/raw/reference/streaming/ChunkType.mdx +788 -314
- package/.docs/raw/reference/streaming/agents/MastraModelOutput.mdx +265 -109
- package/.docs/raw/reference/streaming/agents/stream.mdx +375 -266
- package/.docs/raw/reference/streaming/agents/streamLegacy.mdx +233 -162
- package/.docs/raw/reference/streaming/workflows/observeStream.mdx +4 -4
- package/.docs/raw/reference/streaming/workflows/observeStreamVNext.mdx +5 -5
- package/.docs/raw/reference/streaming/workflows/resumeStreamVNext.mdx +24 -20
- package/.docs/raw/reference/streaming/workflows/stream.mdx +35 -26
- package/.docs/raw/reference/streaming/workflows/streamVNext.mdx +36 -27
- package/.docs/raw/reference/templates/overview.mdx +16 -39
- package/.docs/raw/reference/tools/client.mdx +1 -1
- package/.docs/raw/reference/tools/create-tool.mdx +45 -35
- package/.docs/raw/reference/tools/document-chunker-tool.mdx +2 -2
- package/.docs/raw/reference/tools/graph-rag-tool.mdx +12 -12
- package/.docs/raw/reference/tools/mcp-client.mdx +70 -64
- package/.docs/raw/reference/tools/mcp-server.mdx +91 -78
- package/.docs/raw/reference/tools/vector-query-tool.mdx +48 -38
- package/.docs/raw/reference/vectors/astra.mdx +1 -1
- package/.docs/raw/reference/vectors/chroma.mdx +25 -19
- package/.docs/raw/reference/vectors/couchbase.mdx +4 -4
- package/.docs/raw/reference/vectors/lance.mdx +5 -6
- package/.docs/raw/reference/vectors/libsql.mdx +1 -1
- package/.docs/raw/reference/vectors/mongodb.mdx +1 -1
- package/.docs/raw/reference/vectors/opensearch.mdx +1 -1
- package/.docs/raw/reference/vectors/pg.mdx +8 -4
- package/.docs/raw/reference/vectors/pinecone.mdx +1 -1
- package/.docs/raw/reference/vectors/qdrant.mdx +1 -1
- package/.docs/raw/reference/vectors/s3vectors.mdx +35 -27
- package/.docs/raw/reference/vectors/turbopuffer.mdx +1 -1
- package/.docs/raw/reference/vectors/upstash.mdx +33 -25
- package/.docs/raw/reference/vectors/vectorize.mdx +1 -1
- package/.docs/raw/reference/voice/azure.mdx +1 -1
- package/.docs/raw/reference/voice/cloudflare.mdx +1 -1
- package/.docs/raw/reference/voice/composite-voice.mdx +1 -1
- package/.docs/raw/reference/voice/deepgram.mdx +1 -1
- package/.docs/raw/reference/voice/elevenlabs.mdx +1 -1
- package/.docs/raw/reference/voice/google-gemini-live.mdx +6 -4
- package/.docs/raw/reference/voice/google.mdx +1 -1
- package/.docs/raw/reference/voice/mastra-voice.mdx +1 -1
- package/.docs/raw/reference/voice/murf.mdx +1 -1
- package/.docs/raw/reference/voice/openai-realtime.mdx +1 -1
- package/.docs/raw/reference/voice/openai.mdx +1 -1
- package/.docs/raw/reference/voice/playai.mdx +1 -1
- package/.docs/raw/reference/voice/sarvam.mdx +1 -1
- package/.docs/raw/reference/voice/speechify.mdx +1 -1
- package/.docs/raw/reference/voice/voice.addInstructions.mdx +1 -1
- package/.docs/raw/reference/voice/voice.addTools.mdx +1 -1
- package/.docs/raw/reference/voice/voice.answer.mdx +1 -1
- package/.docs/raw/reference/voice/voice.close.mdx +1 -1
- package/.docs/raw/reference/voice/voice.connect.mdx +1 -1
- package/.docs/raw/reference/voice/voice.events.mdx +1 -1
- package/.docs/raw/reference/voice/voice.getSpeakers.mdx +23 -30
- package/.docs/raw/reference/voice/voice.listen.mdx +1 -1
- package/.docs/raw/reference/voice/voice.off.mdx +1 -1
- package/.docs/raw/reference/voice/voice.on.mdx +1 -1
- package/.docs/raw/reference/voice/voice.send.mdx +1 -1
- package/.docs/raw/reference/voice/voice.speak.mdx +1 -1
- package/.docs/raw/reference/voice/voice.updateConfig.mdx +1 -1
- package/.docs/raw/reference/workflows/run-methods/cancel.mdx +4 -3
- package/.docs/raw/reference/workflows/run-methods/resume.mdx +49 -34
- package/.docs/raw/reference/workflows/run-methods/start.mdx +43 -31
- package/.docs/raw/reference/workflows/run-methods/watch.mdx +7 -8
- package/.docs/raw/reference/workflows/run.mdx +7 -10
- package/.docs/raw/reference/workflows/step.mdx +15 -12
- package/.docs/raw/reference/workflows/workflow-methods/branch.mdx +2 -2
- package/.docs/raw/reference/workflows/workflow-methods/commit.mdx +1 -1
- package/.docs/raw/reference/workflows/workflow-methods/create-run.mdx +7 -7
- package/.docs/raw/reference/workflows/workflow-methods/dountil.mdx +2 -2
- package/.docs/raw/reference/workflows/workflow-methods/dowhile.mdx +2 -2
- package/.docs/raw/reference/workflows/workflow-methods/foreach.mdx +1 -1
- package/.docs/raw/reference/workflows/workflow-methods/map.mdx +3 -2
- package/.docs/raw/reference/workflows/workflow-methods/parallel.mdx +1 -1
- package/.docs/raw/reference/workflows/workflow-methods/sendEvent.mdx +5 -5
- package/.docs/raw/reference/workflows/workflow-methods/sleep.mdx +3 -2
- package/.docs/raw/reference/workflows/workflow-methods/sleepUntil.mdx +3 -2
- package/.docs/raw/reference/workflows/workflow-methods/then.mdx +1 -1
- package/.docs/raw/reference/workflows/workflow-methods/waitForEvent.mdx +3 -3
- package/.docs/raw/reference/workflows/workflow.mdx +13 -10
- package/.docs/raw/scorers/custom-scorers.mdx +58 -48
- package/.docs/raw/scorers/evals-old-api/custom-eval.mdx +6 -8
- package/.docs/raw/scorers/evals-old-api/overview.mdx +8 -8
- package/.docs/raw/scorers/evals-old-api/running-in-ci.mdx +9 -9
- package/.docs/raw/scorers/evals-old-api/textual-evals.mdx +5 -5
- package/.docs/raw/scorers/off-the-shelf-scorers.mdx +3 -1
- package/.docs/raw/scorers/overview.mdx +20 -19
- package/.docs/raw/server-db/custom-api-routes.mdx +8 -8
- package/.docs/raw/server-db/mastra-client.mdx +56 -54
- package/.docs/raw/server-db/middleware.mdx +11 -7
- package/.docs/raw/server-db/production-server.mdx +5 -7
- package/.docs/raw/server-db/runtime-context.mdx +41 -38
- package/.docs/raw/server-db/storage.mdx +82 -87
- package/.docs/raw/streaming/events.mdx +24 -16
- package/.docs/raw/streaming/overview.mdx +17 -18
- package/.docs/raw/streaming/tool-streaming.mdx +9 -10
- package/.docs/raw/streaming/workflow-streaming.mdx +14 -12
- package/.docs/raw/tools-mcp/advanced-usage.mdx +2 -2
- package/.docs/raw/tools-mcp/mcp-overview.mdx +92 -102
- package/.docs/raw/tools-mcp/overview.mdx +9 -14
- package/.docs/raw/voice/overview.mdx +273 -250
- package/.docs/raw/voice/speech-to-speech.mdx +14 -12
- package/.docs/raw/voice/speech-to-text.mdx +2 -2
- package/.docs/raw/voice/text-to-speech.mdx +2 -2
- package/.docs/raw/workflows/agents-and-tools.mdx +29 -28
- package/.docs/raw/workflows/control-flow.mdx +24 -24
- package/.docs/raw/workflows/error-handling.mdx +15 -17
- package/.docs/raw/workflows/human-in-the-loop.mdx +39 -39
- package/.docs/raw/workflows/inngest-workflow.mdx +33 -29
- package/.docs/raw/workflows/input-data-mapping.mdx +9 -9
- package/.docs/raw/workflows/overview.mdx +60 -60
- package/.docs/raw/workflows/snapshots.mdx +54 -36
- package/.docs/raw/workflows/suspend-and-resume.mdx +52 -57
- package/.docs/raw/workflows-legacy/control-flow.mdx +15 -17
- package/.docs/raw/workflows-legacy/dynamic-workflows.mdx +3 -1
- package/.docs/raw/workflows-legacy/error-handling.mdx +8 -6
- package/.docs/raw/workflows-legacy/nested-workflows.mdx +6 -0
- package/.docs/raw/workflows-legacy/overview.mdx +28 -26
- package/.docs/raw/workflows-legacy/runtime-variables.mdx +4 -2
- package/.docs/raw/workflows-legacy/steps.mdx +5 -3
- package/.docs/raw/workflows-legacy/suspend-and-resume.mdx +10 -8
- package/.docs/raw/workflows-legacy/variables.mdx +10 -8
- package/CHANGELOG.md +14 -0
- package/package.json +5 -5
- package/.docs/raw/memory/storage/memory-with-mongodb.mdx +0 -148
|
@@ -3,7 +3,7 @@ title: "Reference: Prompt Alignment Scorer | Scorers | Mastra Docs"
|
|
|
3
3
|
description: Documentation for the Prompt Alignment Scorer in Mastra. Evaluates how well agent responses align with user prompt intent, requirements, completeness, and appropriateness using multi-dimensional analysis.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
|
-
import
|
|
6
|
+
import PropertiesTable from "@site/src/components/PropertiesTable";
|
|
7
7
|
|
|
8
8
|
# Prompt Alignment Scorer
|
|
9
9
|
|
|
@@ -16,7 +16,8 @@ The `createPromptAlignmentScorerLLM()` function creates a scorer that evaluates
|
|
|
16
16
|
{
|
|
17
17
|
name: "model",
|
|
18
18
|
type: "MastraModelConfig",
|
|
19
|
-
description:
|
|
19
|
+
description:
|
|
20
|
+
"The language model to use for evaluating prompt-response alignment",
|
|
20
21
|
required: true,
|
|
21
22
|
},
|
|
22
23
|
{
|
|
@@ -34,7 +35,8 @@ The `createPromptAlignmentScorerLLM()` function creates a scorer that evaluates
|
|
|
34
35
|
{
|
|
35
36
|
name: "evaluationMode",
|
|
36
37
|
type: "'user' | 'system' | 'both'",
|
|
37
|
-
description:
|
|
38
|
+
description:
|
|
39
|
+
"Evaluation mode - 'user' evaluates user prompt alignment only, 'system' evaluates system compliance only, 'both' evaluates both with weighted scoring (default: 'both')",
|
|
38
40
|
required: false,
|
|
39
41
|
},
|
|
40
42
|
],
|
|
@@ -49,12 +51,14 @@ The `createPromptAlignmentScorerLLM()` function creates a scorer that evaluates
|
|
|
49
51
|
{
|
|
50
52
|
name: "score",
|
|
51
53
|
type: "number",
|
|
52
|
-
description:
|
|
54
|
+
description:
|
|
55
|
+
"Multi-dimensional alignment score between 0 and scale (default 0-1)",
|
|
53
56
|
},
|
|
54
57
|
{
|
|
55
58
|
name: "reason",
|
|
56
59
|
type: "string",
|
|
57
|
-
description:
|
|
60
|
+
description:
|
|
61
|
+
"Human-readable explanation of the prompt alignment evaluation with detailed breakdown",
|
|
58
62
|
},
|
|
59
63
|
]}
|
|
60
64
|
/>
|
|
@@ -104,12 +108,12 @@ The `createPromptAlignmentScorerLLM()` function creates a scorer that evaluates
|
|
|
104
108
|
You can customize the Prompt Alignment Scorer by adjusting the scale parameter and evaluation mode to fit your scoring needs.
|
|
105
109
|
|
|
106
110
|
```typescript showLineNumbers copy
|
|
107
|
-
const scorer = createPromptAlignmentScorerLLM({
|
|
108
|
-
model:
|
|
109
|
-
options: {
|
|
111
|
+
const scorer = createPromptAlignmentScorerLLM({
|
|
112
|
+
model: "openai/gpt-4o-mini",
|
|
113
|
+
options: {
|
|
110
114
|
scale: 10, // Score from 0-10 instead of 0-1
|
|
111
|
-
evaluationMode:
|
|
112
|
-
}
|
|
115
|
+
evaluationMode: "both", // 'user', 'system', or 'both' (default)
|
|
116
|
+
},
|
|
113
117
|
});
|
|
114
118
|
```
|
|
115
119
|
|
|
@@ -118,14 +122,16 @@ const scorer = createPromptAlignmentScorerLLM({
|
|
|
118
122
|
Prompt Alignment evaluates responses across four key dimensions with weighted scoring that adapts based on the evaluation mode:
|
|
119
123
|
|
|
120
124
|
#### User Mode ('user')
|
|
125
|
+
|
|
121
126
|
Evaluates alignment with user prompts only:
|
|
122
127
|
|
|
123
128
|
1. **Intent Alignment** (40% weight) - Whether the response addresses the user's core request
|
|
124
129
|
2. **Requirements Fulfillment** (30% weight) - If all user requirements are met
|
|
125
|
-
3. **Completeness** (20% weight) - Whether the response is comprehensive for user needs
|
|
130
|
+
3. **Completeness** (20% weight) - Whether the response is comprehensive for user needs
|
|
126
131
|
4. **Response Appropriateness** (10% weight) - If format and tone match user expectations
|
|
127
132
|
|
|
128
133
|
#### System Mode ('system')
|
|
134
|
+
|
|
129
135
|
Evaluates compliance with system guidelines only:
|
|
130
136
|
|
|
131
137
|
1. **Intent Alignment** (35% weight) - Whether the response follows system behavioral guidelines
|
|
@@ -134,6 +140,7 @@ Evaluates compliance with system guidelines only:
|
|
|
134
140
|
4. **Response Appropriateness** (15% weight) - If format and tone match system specifications
|
|
135
141
|
|
|
136
142
|
#### Both Mode ('both' - default)
|
|
143
|
+
|
|
137
144
|
Combines evaluation of both user and system alignment:
|
|
138
145
|
|
|
139
146
|
- **User alignment**: 70% of final score (using user mode weights)
|
|
@@ -143,28 +150,32 @@ Combines evaluation of both user and system alignment:
|
|
|
143
150
|
### Scoring Formula
|
|
144
151
|
|
|
145
152
|
**User Mode:**
|
|
153
|
+
|
|
146
154
|
```
|
|
147
|
-
Weighted Score = (intent_score × 0.4) + (requirements_score × 0.3) +
|
|
155
|
+
Weighted Score = (intent_score × 0.4) + (requirements_score × 0.3) +
|
|
148
156
|
(completeness_score × 0.2) + (appropriateness_score × 0.1)
|
|
149
157
|
Final Score = Weighted Score × scale
|
|
150
158
|
```
|
|
151
159
|
|
|
152
160
|
**System Mode:**
|
|
161
|
+
|
|
153
162
|
```
|
|
154
|
-
Weighted Score = (intent_score × 0.35) + (requirements_score × 0.35) +
|
|
163
|
+
Weighted Score = (intent_score × 0.35) + (requirements_score × 0.35) +
|
|
155
164
|
(completeness_score × 0.15) + (appropriateness_score × 0.15)
|
|
156
165
|
Final Score = Weighted Score × scale
|
|
157
166
|
```
|
|
158
167
|
|
|
159
168
|
**Both Mode (default):**
|
|
169
|
+
|
|
160
170
|
```
|
|
161
171
|
User Score = (user dimensions with user weights)
|
|
162
|
-
System Score = (system dimensions with system weights)
|
|
172
|
+
System Score = (system dimensions with system weights)
|
|
163
173
|
Weighted Score = (User Score × 0.7) + (System Score × 0.3)
|
|
164
174
|
Final Score = Weighted Score × scale
|
|
165
175
|
```
|
|
166
176
|
|
|
167
177
|
**Weight Distribution Rationale**:
|
|
178
|
+
|
|
168
179
|
- **User Mode**: Prioritizes intent (40%) and requirements (30%) for user satisfaction
|
|
169
180
|
- **System Mode**: Balances behavioral compliance (35%) and constraints (35%) equally
|
|
170
181
|
- **Both Mode**: 70/30 split ensures user needs are primary while maintaining system compliance
|
|
@@ -181,18 +192,21 @@ Final Score = Weighted Score × scale
|
|
|
181
192
|
### When to Use Each Mode
|
|
182
193
|
|
|
183
194
|
**User Mode (`'user'`)** - Use when:
|
|
195
|
+
|
|
184
196
|
- Evaluating customer service responses for user satisfaction
|
|
185
|
-
- Testing content generation quality from user perspective
|
|
197
|
+
- Testing content generation quality from user perspective
|
|
186
198
|
- Measuring how well responses address user questions
|
|
187
199
|
- Focusing purely on request fulfillment without system constraints
|
|
188
200
|
|
|
189
201
|
**System Mode (`'system'`)** - Use when:
|
|
202
|
+
|
|
190
203
|
- Auditing AI safety and compliance with behavioral guidelines
|
|
191
204
|
- Ensuring agents follow brand voice and tone requirements
|
|
192
205
|
- Validating adherence to content policies and constraints
|
|
193
206
|
- Testing system-level behavioral consistency
|
|
194
207
|
|
|
195
208
|
**Both Mode (`'both'`)** - Use when (default, recommended):
|
|
209
|
+
|
|
196
210
|
- Comprehensive evaluation of overall AI agent performance
|
|
197
211
|
- Balancing user satisfaction with system compliance
|
|
198
212
|
- Production monitoring where both user and system requirements matter
|
|
@@ -201,21 +215,26 @@ Final Score = Weighted Score × scale
|
|
|
201
215
|
## Common Use Cases
|
|
202
216
|
|
|
203
217
|
### Code Generation Evaluation
|
|
218
|
+
|
|
204
219
|
Ideal for evaluating:
|
|
220
|
+
|
|
205
221
|
- Programming task completion
|
|
206
|
-
- Code quality and completeness
|
|
222
|
+
- Code quality and completeness
|
|
207
223
|
- Adherence to coding requirements
|
|
208
224
|
- Format specifications (functions, classes, etc.)
|
|
209
225
|
|
|
210
226
|
```typescript
|
|
211
227
|
// Example: API endpoint creation
|
|
212
|
-
const codePrompt =
|
|
213
|
-
|
|
228
|
+
const codePrompt =
|
|
229
|
+
"Create a REST API endpoint with authentication and rate limiting";
|
|
230
|
+
// Scorer evaluates: intent (API creation), requirements (auth + rate limiting),
|
|
214
231
|
// completeness (full implementation), format (code structure)
|
|
215
232
|
```
|
|
216
233
|
|
|
217
234
|
### Instruction Following Assessment
|
|
235
|
+
|
|
218
236
|
Perfect for:
|
|
237
|
+
|
|
219
238
|
- Task completion verification
|
|
220
239
|
- Multi-step instruction adherence
|
|
221
240
|
- Requirement compliance checking
|
|
@@ -223,12 +242,15 @@ Perfect for:
|
|
|
223
242
|
|
|
224
243
|
```typescript
|
|
225
244
|
// Example: Multi-requirement task
|
|
226
|
-
const taskPrompt =
|
|
245
|
+
const taskPrompt =
|
|
246
|
+
"Write a Python class with initialization, validation, error handling, and documentation";
|
|
227
247
|
// Scorer tracks each requirement individually and provides detailed breakdown
|
|
228
248
|
```
|
|
229
249
|
|
|
230
250
|
### Content Format Validation
|
|
251
|
+
|
|
231
252
|
Useful for:
|
|
253
|
+
|
|
232
254
|
- Format specification compliance
|
|
233
255
|
- Style guide adherence
|
|
234
256
|
- Output structure verification
|
|
@@ -236,49 +258,53 @@ Useful for:
|
|
|
236
258
|
|
|
237
259
|
```typescript
|
|
238
260
|
// Example: Structured output
|
|
239
|
-
const formatPrompt =
|
|
261
|
+
const formatPrompt =
|
|
262
|
+
"Explain the differences between let and const in JavaScript using bullet points";
|
|
240
263
|
// Scorer evaluates content accuracy AND format compliance
|
|
241
264
|
```
|
|
242
265
|
|
|
243
266
|
### Agent Response Quality
|
|
267
|
+
|
|
244
268
|
Measure how well your AI agents follow user instructions:
|
|
245
269
|
|
|
246
270
|
```typescript
|
|
247
271
|
const agent = new Agent({
|
|
248
|
-
name:
|
|
249
|
-
instructions:
|
|
250
|
-
|
|
272
|
+
name: "CodingAssistant",
|
|
273
|
+
instructions:
|
|
274
|
+
"You are a helpful coding assistant. Always provide working code examples.",
|
|
275
|
+
model: "openai/gpt-4o",
|
|
251
276
|
});
|
|
252
277
|
|
|
253
278
|
// Evaluate comprehensive alignment (default)
|
|
254
279
|
const scorer = createPromptAlignmentScorerLLM({
|
|
255
|
-
model:
|
|
256
|
-
options: { evaluationMode:
|
|
280
|
+
model: "openai/gpt-4o-mini",
|
|
281
|
+
options: { evaluationMode: "both" }, // Evaluates both user intent and system guidelines
|
|
257
282
|
});
|
|
258
283
|
|
|
259
284
|
// Evaluate just user satisfaction
|
|
260
285
|
const userScorer = createPromptAlignmentScorerLLM({
|
|
261
|
-
model:
|
|
262
|
-
options: { evaluationMode:
|
|
286
|
+
model: "openai/gpt-4o-mini",
|
|
287
|
+
options: { evaluationMode: "user" }, // Focus only on user request fulfillment
|
|
263
288
|
});
|
|
264
289
|
|
|
265
290
|
// Evaluate system compliance
|
|
266
291
|
const systemScorer = createPromptAlignmentScorerLLM({
|
|
267
|
-
model:
|
|
268
|
-
options: { evaluationMode:
|
|
292
|
+
model: "openai/gpt-4o-mini",
|
|
293
|
+
options: { evaluationMode: "system" }, // Check adherence to system instructions
|
|
269
294
|
});
|
|
270
295
|
|
|
271
296
|
const result = await scorer.run(agentRun);
|
|
272
297
|
```
|
|
273
298
|
|
|
274
299
|
### Prompt Engineering Optimization
|
|
300
|
+
|
|
275
301
|
Test different prompts to improve alignment:
|
|
276
302
|
|
|
277
303
|
```typescript
|
|
278
304
|
const prompts = [
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
305
|
+
"Write a function to calculate factorial",
|
|
306
|
+
"Create a Python function that calculates factorial with error handling for negative inputs",
|
|
307
|
+
"Implement a factorial calculator in Python with: input validation, error handling, and docstring",
|
|
282
308
|
];
|
|
283
309
|
|
|
284
310
|
// Compare alignment scores to find the best prompt
|
|
@@ -289,6 +315,7 @@ for (const prompt of prompts) {
|
|
|
289
315
|
```
|
|
290
316
|
|
|
291
317
|
### Multi-Agent System Evaluation
|
|
318
|
+
|
|
292
319
|
Compare different agents or models:
|
|
293
320
|
|
|
294
321
|
```typescript
|
|
@@ -311,27 +338,30 @@ for (const agent of agents) {
|
|
|
311
338
|
### Basic Configuration
|
|
312
339
|
|
|
313
340
|
```typescript
|
|
314
|
-
import { createPromptAlignmentScorerLLM } from
|
|
341
|
+
import { createPromptAlignmentScorerLLM } from "@mastra/evals";
|
|
315
342
|
|
|
316
343
|
const scorer = createPromptAlignmentScorerLLM({
|
|
317
|
-
model:
|
|
344
|
+
model: "openai/gpt-4o",
|
|
318
345
|
});
|
|
319
346
|
|
|
320
347
|
// Evaluate a code generation task
|
|
321
348
|
const result = await scorer.run({
|
|
322
|
-
input: [
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
349
|
+
input: [
|
|
350
|
+
{
|
|
351
|
+
role: "user",
|
|
352
|
+
content:
|
|
353
|
+
"Write a Python function to calculate factorial with error handling",
|
|
354
|
+
},
|
|
355
|
+
],
|
|
326
356
|
output: {
|
|
327
|
-
role:
|
|
357
|
+
role: "assistant",
|
|
328
358
|
text: `def factorial(n):
|
|
329
359
|
if n < 0:
|
|
330
360
|
raise ValueError("Factorial not defined for negative numbers")
|
|
331
361
|
if n == 0:
|
|
332
362
|
return 1
|
|
333
|
-
return n * factorial(n-1)
|
|
334
|
-
}
|
|
363
|
+
return n * factorial(n-1)`,
|
|
364
|
+
},
|
|
335
365
|
});
|
|
336
366
|
// Result: { score: 0.95, reason: "Excellent alignment - function addresses intent, includes error handling..." }
|
|
337
367
|
```
|
|
@@ -341,23 +371,23 @@ const result = await scorer.run({
|
|
|
341
371
|
```typescript
|
|
342
372
|
// Configure scale and evaluation mode
|
|
343
373
|
const scorer = createPromptAlignmentScorerLLM({
|
|
344
|
-
model:
|
|
374
|
+
model: "openai/gpt-4o",
|
|
345
375
|
options: {
|
|
346
376
|
scale: 10, // Score from 0-10 instead of 0-1
|
|
347
|
-
evaluationMode:
|
|
377
|
+
evaluationMode: "both", // 'user', 'system', or 'both' (default)
|
|
348
378
|
},
|
|
349
379
|
});
|
|
350
380
|
|
|
351
381
|
// User-only evaluation - focus on user satisfaction
|
|
352
382
|
const userScorer = createPromptAlignmentScorerLLM({
|
|
353
|
-
model:
|
|
354
|
-
options: { evaluationMode:
|
|
383
|
+
model: "openai/gpt-4o",
|
|
384
|
+
options: { evaluationMode: "user" },
|
|
355
385
|
});
|
|
356
386
|
|
|
357
387
|
// System-only evaluation - focus on compliance
|
|
358
388
|
const systemScorer = createPromptAlignmentScorerLLM({
|
|
359
|
-
model:
|
|
360
|
-
options: { evaluationMode:
|
|
389
|
+
model: "openai/gpt-4o",
|
|
390
|
+
options: { evaluationMode: "system" },
|
|
361
391
|
});
|
|
362
392
|
|
|
363
393
|
const result = await scorer.run(testRun);
|
|
@@ -369,14 +399,16 @@ const result = await scorer.run(testRun);
|
|
|
369
399
|
```typescript
|
|
370
400
|
// Evaluate bullet point formatting
|
|
371
401
|
const result = await scorer.run({
|
|
372
|
-
input: [
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
402
|
+
input: [
|
|
403
|
+
{
|
|
404
|
+
role: "user",
|
|
405
|
+
content: "List the benefits of TypeScript in bullet points",
|
|
406
|
+
},
|
|
407
|
+
],
|
|
376
408
|
output: {
|
|
377
|
-
role:
|
|
378
|
-
text:
|
|
379
|
-
}
|
|
409
|
+
role: "assistant",
|
|
410
|
+
text: "TypeScript provides static typing, better IDE support, and enhanced code reliability.",
|
|
411
|
+
},
|
|
380
412
|
});
|
|
381
413
|
// Result: Lower appropriateness score due to format mismatch (paragraph vs bullet points)
|
|
382
414
|
```
|
|
@@ -385,26 +417,29 @@ const result = await scorer.run({
|
|
|
385
417
|
|
|
386
418
|
In this example, the response fully addresses the user's prompt with all requirements met.
|
|
387
419
|
|
|
388
|
-
```typescript
|
|
420
|
+
```typescript title="src/example-excellent-prompt-alignment.ts" showLineNumbers copy
|
|
389
421
|
import { createPromptAlignmentScorerLLM } from "@mastra/evals/scorers/llm";
|
|
390
422
|
|
|
391
|
-
const scorer = createPromptAlignmentScorerLLM({
|
|
392
|
-
model:
|
|
423
|
+
const scorer = createPromptAlignmentScorerLLM({
|
|
424
|
+
model: "openai/gpt-4o-mini",
|
|
393
425
|
});
|
|
394
426
|
|
|
395
|
-
const inputMessages = [
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
427
|
+
const inputMessages = [
|
|
428
|
+
{
|
|
429
|
+
role: "user",
|
|
430
|
+
content:
|
|
431
|
+
"Write a Python function to calculate factorial with error handling for negative numbers",
|
|
432
|
+
},
|
|
433
|
+
];
|
|
399
434
|
|
|
400
|
-
const outputMessage = {
|
|
435
|
+
const outputMessage = {
|
|
401
436
|
text: `def factorial(n):
|
|
402
437
|
"""Calculate factorial of a number."""
|
|
403
438
|
if n < 0:
|
|
404
439
|
raise ValueError("Factorial not defined for negative numbers")
|
|
405
440
|
if n == 0 or n == 1:
|
|
406
441
|
return 1
|
|
407
|
-
return n * factorial(n - 1)
|
|
442
|
+
return n * factorial(n - 1)`,
|
|
408
443
|
};
|
|
409
444
|
|
|
410
445
|
const result = await scorer.run({
|
|
@@ -430,20 +465,22 @@ The output receives a high score because it perfectly addresses the intent, fulf
|
|
|
430
465
|
|
|
431
466
|
In this example, the response addresses the core intent but misses some requirements or has format issues.
|
|
432
467
|
|
|
433
|
-
```typescript
|
|
468
|
+
```typescript title="src/example-partial-prompt-alignment.ts" showLineNumbers copy
|
|
434
469
|
import { createPromptAlignmentScorerLLM } from "@mastra/evals/scorers/llm";
|
|
435
470
|
|
|
436
|
-
const scorer = createPromptAlignmentScorerLLM({
|
|
437
|
-
model:
|
|
471
|
+
const scorer = createPromptAlignmentScorerLLM({
|
|
472
|
+
model: "openai/gpt-4o-mini",
|
|
438
473
|
});
|
|
439
474
|
|
|
440
|
-
const inputMessages = [
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
475
|
+
const inputMessages = [
|
|
476
|
+
{
|
|
477
|
+
role: "user",
|
|
478
|
+
content: "List the benefits of TypeScript in bullet points",
|
|
479
|
+
},
|
|
480
|
+
];
|
|
444
481
|
|
|
445
|
-
const outputMessage = {
|
|
446
|
-
text: "TypeScript provides static typing, better IDE support, and enhanced code reliability through compile-time error checking."
|
|
482
|
+
const outputMessage = {
|
|
483
|
+
text: "TypeScript provides static typing, better IDE support, and enhanced code reliability through compile-time error checking.",
|
|
447
484
|
};
|
|
448
485
|
|
|
449
486
|
const result = await scorer.run({
|
|
@@ -469,22 +506,25 @@ The output receives a lower score because while the content is accurate, it does
|
|
|
469
506
|
|
|
470
507
|
In this example, the response fails to address the user's specific requirements.
|
|
471
508
|
|
|
472
|
-
```typescript
|
|
509
|
+
```typescript title="src/example-poor-prompt-alignment.ts" showLineNumbers copy
|
|
473
510
|
import { createPromptAlignmentScorerLLM } from "@mastra/evals/scorers/llm";
|
|
474
511
|
|
|
475
|
-
const scorer = createPromptAlignmentScorerLLM({
|
|
476
|
-
model:
|
|
512
|
+
const scorer = createPromptAlignmentScorerLLM({
|
|
513
|
+
model: "openai/gpt-4o-mini",
|
|
477
514
|
});
|
|
478
515
|
|
|
479
|
-
const inputMessages = [
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
516
|
+
const inputMessages = [
|
|
517
|
+
{
|
|
518
|
+
role: "user",
|
|
519
|
+
content:
|
|
520
|
+
"Write a Python class with initialization, validation, error handling, and documentation",
|
|
521
|
+
},
|
|
522
|
+
];
|
|
483
523
|
|
|
484
|
-
const outputMessage = {
|
|
524
|
+
const outputMessage = {
|
|
485
525
|
text: `class Example:
|
|
486
526
|
def __init__(self, value):
|
|
487
|
-
self.value = value
|
|
527
|
+
self.value = value`,
|
|
488
528
|
};
|
|
489
529
|
|
|
490
530
|
const result = await scorer.run({
|
|
@@ -512,26 +552,30 @@ The output receives a low score because it only partially fulfills the requireme
|
|
|
512
552
|
|
|
513
553
|
Evaluates how well the response addresses the user's request, ignoring system instructions:
|
|
514
554
|
|
|
515
|
-
```typescript
|
|
516
|
-
const scorer = createPromptAlignmentScorerLLM({
|
|
517
|
-
model:
|
|
518
|
-
options: { evaluationMode:
|
|
555
|
+
```typescript title="src/example-user-mode.ts" showLineNumbers copy
|
|
556
|
+
const scorer = createPromptAlignmentScorerLLM({
|
|
557
|
+
model: "openai/gpt-4o-mini",
|
|
558
|
+
options: { evaluationMode: "user" },
|
|
519
559
|
});
|
|
520
560
|
|
|
521
561
|
const result = await scorer.run({
|
|
522
562
|
input: {
|
|
523
|
-
inputMessages: [
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
563
|
+
inputMessages: [
|
|
564
|
+
{
|
|
565
|
+
role: "user",
|
|
566
|
+
content: "Explain recursion with an example",
|
|
567
|
+
},
|
|
568
|
+
],
|
|
569
|
+
systemMessages: [
|
|
570
|
+
{
|
|
571
|
+
role: "system",
|
|
572
|
+
content: "Always provide code examples in Python",
|
|
573
|
+
},
|
|
574
|
+
],
|
|
575
|
+
},
|
|
576
|
+
output: {
|
|
577
|
+
text: "Recursion is when a function calls itself. For example: factorial(5) = 5 * factorial(4)",
|
|
531
578
|
},
|
|
532
|
-
output: {
|
|
533
|
-
text: "Recursion is when a function calls itself. For example: factorial(5) = 5 * factorial(4)"
|
|
534
|
-
}
|
|
535
579
|
});
|
|
536
580
|
// Scores high for addressing user request, even without Python code
|
|
537
581
|
```
|
|
@@ -540,26 +584,31 @@ const result = await scorer.run({
|
|
|
540
584
|
|
|
541
585
|
Evaluates compliance with system behavioral guidelines and constraints:
|
|
542
586
|
|
|
543
|
-
```typescript
|
|
544
|
-
const scorer = createPromptAlignmentScorerLLM({
|
|
545
|
-
model:
|
|
546
|
-
options: { evaluationMode:
|
|
587
|
+
```typescript title="src/example-system-mode.ts" showLineNumbers copy
|
|
588
|
+
const scorer = createPromptAlignmentScorerLLM({
|
|
589
|
+
model: "openai/gpt-4o-mini",
|
|
590
|
+
options: { evaluationMode: "system" },
|
|
547
591
|
});
|
|
548
592
|
|
|
549
593
|
const result = await scorer.run({
|
|
550
594
|
input: {
|
|
551
|
-
systemMessages: [
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
595
|
+
systemMessages: [
|
|
596
|
+
{
|
|
597
|
+
role: "system",
|
|
598
|
+
content:
|
|
599
|
+
"You are a helpful assistant. Always be polite, concise, and provide examples.",
|
|
600
|
+
},
|
|
601
|
+
],
|
|
602
|
+
inputMessages: [
|
|
603
|
+
{
|
|
604
|
+
role: "user",
|
|
605
|
+
content: "What is machine learning?",
|
|
606
|
+
},
|
|
607
|
+
],
|
|
608
|
+
},
|
|
609
|
+
output: {
|
|
610
|
+
text: "Machine learning is a subset of AI where computers learn from data. For example, spam filters learn to identify unwanted emails by analyzing patterns in previously marked spam.",
|
|
559
611
|
},
|
|
560
|
-
output: {
|
|
561
|
-
text: "Machine learning is a subset of AI where computers learn from data. For example, spam filters learn to identify unwanted emails by analyzing patterns in previously marked spam."
|
|
562
|
-
}
|
|
563
612
|
});
|
|
564
613
|
// Evaluates politeness, conciseness, and example provision
|
|
565
614
|
```
|
|
@@ -568,47 +617,52 @@ const result = await scorer.run({
|
|
|
568
617
|
|
|
569
618
|
Evaluates both user intent fulfillment and system compliance with weighted scoring (70% user, 30% system):
|
|
570
619
|
|
|
571
|
-
```typescript
|
|
572
|
-
const scorer = createPromptAlignmentScorerLLM({
|
|
573
|
-
model:
|
|
574
|
-
options: { evaluationMode:
|
|
620
|
+
```typescript title="src/example-both-mode.ts" showLineNumbers copy
|
|
621
|
+
const scorer = createPromptAlignmentScorerLLM({
|
|
622
|
+
model: "openai/gpt-4o-mini",
|
|
623
|
+
options: { evaluationMode: "both" }, // This is the default
|
|
575
624
|
});
|
|
576
625
|
|
|
577
626
|
const result = await scorer.run({
|
|
578
627
|
input: {
|
|
579
|
-
systemMessages: [
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
628
|
+
systemMessages: [
|
|
629
|
+
{
|
|
630
|
+
role: "system",
|
|
631
|
+
content:
|
|
632
|
+
"Always provide code examples when explaining programming concepts",
|
|
633
|
+
},
|
|
634
|
+
],
|
|
635
|
+
inputMessages: [
|
|
636
|
+
{
|
|
637
|
+
role: "user",
|
|
638
|
+
content: "Explain how to reverse a string",
|
|
639
|
+
},
|
|
640
|
+
],
|
|
587
641
|
},
|
|
588
|
-
output: {
|
|
642
|
+
output: {
|
|
589
643
|
text: `To reverse a string, you can iterate through it backwards. Here's an example in Python:
|
|
590
644
|
|
|
591
645
|
def reverse_string(s):
|
|
592
646
|
return s[::-1]
|
|
593
647
|
|
|
594
|
-
# Usage: reverse_string("hello") returns "olleh"
|
|
595
|
-
}
|
|
648
|
+
# Usage: reverse_string("hello") returns "olleh"`,
|
|
649
|
+
},
|
|
596
650
|
});
|
|
597
651
|
// High score for both addressing the user's request AND following system guidelines
|
|
598
652
|
```
|
|
599
653
|
|
|
600
654
|
## Comparison with Other Scorers
|
|
601
655
|
|
|
602
|
-
| Aspect
|
|
603
|
-
|
|
604
|
-
| **Focus**
|
|
656
|
+
| Aspect | Prompt Alignment | Answer Relevancy | Faithfulness |
|
|
657
|
+
| -------------- | ------------------------------------------ | ---------------------------- | -------------------------------- |
|
|
658
|
+
| **Focus** | Multi-dimensional prompt adherence | Query-response relevance | Context groundedness |
|
|
605
659
|
| **Evaluation** | Intent, requirements, completeness, format | Semantic similarity to query | Factual consistency with context |
|
|
606
|
-
| **Use Case**
|
|
607
|
-
| **Dimensions** | 4 weighted dimensions
|
|
660
|
+
| **Use Case** | General prompt following | Information retrieval | RAG/context-based systems |
|
|
661
|
+
| **Dimensions** | 4 weighted dimensions | Single relevance dimension | Single faithfulness dimension |
|
|
608
662
|
|
|
609
663
|
## Related
|
|
610
664
|
|
|
611
665
|
- [Answer Relevancy Scorer](/reference/scorers/answer-relevancy) - Evaluates query-response relevance
|
|
612
666
|
- [Faithfulness Scorer](/reference/scorers/faithfulness) - Measures context groundedness
|
|
613
667
|
- [Tool Call Accuracy Scorer](/reference/scorers/tool-call-accuracy) - Evaluates tool selection
|
|
614
|
-
- [Custom Scorers](/docs/scorers/custom-scorers) - Creating your own evaluation metrics
|
|
668
|
+
- [Custom Scorers](/docs/scorers/custom-scorers) - Creating your own evaluation metrics
|