@mastra/mcp-docs-server 0.13.37 → 0.13.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/organized/changelogs/%40internal%2Fchangeset-cli.md +2 -0
- package/.docs/organized/changelogs/%40internal%2Fexternal-types.md +2 -0
- package/.docs/organized/changelogs/%40internal%2Fstorage-test-utils.md +8 -8
- package/.docs/organized/changelogs/%40internal%2Ftypes-builder.md +2 -0
- package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +23 -23
- package/.docs/organized/changelogs/%40mastra%2Fcore.md +69 -69
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloud.md +17 -17
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +17 -17
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-netlify.md +17 -17
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-vercel.md +17 -17
- package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +25 -25
- package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +15 -15
- package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +27 -27
- package/.docs/organized/changelogs/%40mastra%2Freact.md +15 -15
- package/.docs/organized/changelogs/%40mastra%2Fserver.md +23 -23
- package/.docs/organized/changelogs/create-mastra.md +5 -5
- package/.docs/organized/changelogs/mastra.md +17 -17
- package/.docs/raw/agents/adding-voice.mdx +20 -9
- package/.docs/raw/agents/agent-memory.mdx +55 -39
- package/.docs/raw/agents/guardrails.mdx +68 -61
- package/.docs/raw/agents/networks.mdx +50 -46
- package/.docs/raw/agents/overview.mdx +125 -88
- package/.docs/raw/agents/using-tools.mdx +14 -15
- package/.docs/raw/auth/auth0.mdx +28 -27
- package/.docs/raw/auth/clerk.mdx +22 -20
- package/.docs/raw/auth/firebase.mdx +42 -39
- package/.docs/raw/auth/index.mdx +1 -1
- package/.docs/raw/auth/jwt.mdx +18 -16
- package/.docs/raw/auth/supabase.mdx +20 -18
- package/.docs/raw/auth/workos.mdx +32 -26
- package/.docs/raw/community/contributing-templates.mdx +7 -7
- package/.docs/raw/community/discord.mdx +2 -2
- package/.docs/raw/community/licensing.mdx +1 -1
- package/.docs/raw/course/03-agent-memory/26-updating-mastra-export-comprehensive.md +0 -32
- package/.docs/raw/deployment/cloud-providers/amazon-ec2.mdx +41 -22
- package/.docs/raw/deployment/cloud-providers/aws-lambda.mdx +48 -29
- package/.docs/raw/deployment/cloud-providers/azure-app-services.mdx +52 -24
- package/.docs/raw/deployment/cloud-providers/digital-ocean.mdx +86 -55
- package/.docs/raw/deployment/cloud-providers/index.mdx +16 -13
- package/.docs/raw/deployment/monorepo.mdx +32 -42
- package/.docs/raw/deployment/overview.mdx +15 -15
- package/.docs/raw/deployment/server-deployment.mdx +18 -23
- package/.docs/raw/deployment/serverless-platforms/cloudflare-deployer.mdx +16 -17
- package/.docs/raw/deployment/serverless-platforms/index.mdx +15 -12
- package/.docs/raw/deployment/serverless-platforms/netlify-deployer.mdx +14 -23
- package/.docs/raw/deployment/serverless-platforms/vercel-deployer.mdx +16 -23
- package/.docs/raw/deployment/web-framework.mdx +14 -14
- package/.docs/raw/frameworks/agentic-uis/ai-sdk.mdx +138 -145
- package/.docs/raw/frameworks/agentic-uis/assistant-ui.mdx +54 -43
- package/.docs/raw/frameworks/agentic-uis/cedar-os.mdx +51 -36
- package/.docs/raw/frameworks/agentic-uis/copilotkit.mdx +161 -120
- package/.docs/raw/frameworks/agentic-uis/openrouter.mdx +59 -48
- package/.docs/raw/frameworks/servers/express.mdx +45 -44
- package/.docs/raw/frameworks/web-frameworks/astro.mdx +245 -162
- package/.docs/raw/frameworks/web-frameworks/next-js.mdx +112 -69
- package/.docs/raw/frameworks/web-frameworks/sveltekit.mdx +189 -164
- package/.docs/raw/frameworks/web-frameworks/vite-react.mdx +94 -81
- package/.docs/raw/getting-started/installation.mdx +164 -133
- package/.docs/raw/getting-started/mcp-docs-server.mdx +36 -39
- package/.docs/raw/getting-started/project-structure.mdx +34 -42
- package/.docs/raw/getting-started/studio.mdx +40 -58
- package/.docs/raw/getting-started/templates.mdx +22 -27
- package/.docs/raw/index.mdx +9 -9
- package/.docs/raw/mastra-cloud/dashboard.mdx +10 -10
- package/.docs/raw/mastra-cloud/observability.mdx +7 -8
- package/.docs/raw/mastra-cloud/overview.mdx +16 -22
- package/.docs/raw/mastra-cloud/setting-up.mdx +33 -20
- package/.docs/raw/memory/conversation-history.mdx +2 -2
- package/.docs/raw/memory/overview.mdx +21 -23
- package/.docs/raw/memory/semantic-recall.mdx +14 -13
- package/.docs/raw/memory/storage/memory-with-libsql.mdx +27 -28
- package/.docs/raw/memory/storage/memory-with-pg.mdx +26 -26
- package/.docs/raw/memory/storage/memory-with-upstash.mdx +26 -27
- package/.docs/raw/memory/threads-and-resources.mdx +23 -20
- package/.docs/raw/memory/working-memory.mdx +27 -38
- package/.docs/raw/observability/ai-tracing/exporters/arize.mdx +30 -29
- package/.docs/raw/observability/ai-tracing/exporters/braintrust.mdx +8 -9
- package/.docs/raw/observability/ai-tracing/exporters/cloud.mdx +17 -16
- package/.docs/raw/observability/ai-tracing/exporters/default.mdx +31 -32
- package/.docs/raw/observability/ai-tracing/exporters/langfuse.mdx +18 -17
- package/.docs/raw/observability/ai-tracing/exporters/langsmith.mdx +14 -14
- package/.docs/raw/observability/ai-tracing/exporters/otel.mdx +56 -46
- package/.docs/raw/observability/ai-tracing/overview.mdx +145 -122
- package/.docs/raw/observability/ai-tracing/processors/sensitive-data-filter.mdx +57 -36
- package/.docs/raw/observability/logging.mdx +14 -17
- package/.docs/raw/observability/nextjs-tracing.mdx +5 -5
- package/.docs/raw/observability/otel-tracing.mdx +17 -18
- package/.docs/raw/observability/overview.mdx +14 -10
- package/.docs/raw/rag/chunking-and-embedding.mdx +10 -8
- package/.docs/raw/rag/overview.mdx +5 -5
- package/.docs/raw/rag/retrieval.mdx +125 -107
- package/.docs/raw/rag/vector-databases.mdx +232 -223
- package/.docs/raw/reference/agents/agent.mdx +54 -41
- package/.docs/raw/reference/agents/generate.mdx +367 -261
- package/.docs/raw/reference/agents/generateLegacy.mdx +260 -178
- package/.docs/raw/reference/agents/getDefaultGenerateOptions.mdx +7 -5
- package/.docs/raw/reference/agents/getDefaultStreamOptions.mdx +7 -5
- package/.docs/raw/reference/agents/getDescription.mdx +3 -2
- package/.docs/raw/reference/agents/getInstructions.mdx +7 -5
- package/.docs/raw/reference/agents/getLLM.mdx +11 -7
- package/.docs/raw/reference/agents/getMemory.mdx +7 -5
- package/.docs/raw/reference/agents/getModel.mdx +7 -5
- package/.docs/raw/reference/agents/getScorers.mdx +7 -5
- package/.docs/raw/reference/agents/getTools.mdx +7 -5
- package/.docs/raw/reference/agents/getVoice.mdx +7 -5
- package/.docs/raw/reference/agents/getWorkflows.mdx +7 -5
- package/.docs/raw/reference/agents/listAgents.mdx +6 -4
- package/.docs/raw/reference/agents/network.mdx +171 -116
- package/.docs/raw/reference/auth/auth0.mdx +18 -14
- package/.docs/raw/reference/auth/clerk.mdx +15 -12
- package/.docs/raw/reference/auth/firebase.mdx +23 -16
- package/.docs/raw/reference/auth/jwt.mdx +7 -6
- package/.docs/raw/reference/auth/supabase.mdx +13 -10
- package/.docs/raw/reference/auth/workos.mdx +17 -13
- package/.docs/raw/reference/cli/create-mastra.mdx +61 -44
- package/.docs/raw/reference/cli/mastra.mdx +11 -11
- package/.docs/raw/reference/client-js/agents.mdx +40 -43
- package/.docs/raw/reference/client-js/error-handling.mdx +1 -1
- package/.docs/raw/reference/client-js/logs.mdx +1 -1
- package/.docs/raw/reference/client-js/mastra-client.mdx +21 -13
- package/.docs/raw/reference/client-js/memory.mdx +6 -2
- package/.docs/raw/reference/client-js/observability.mdx +10 -9
- package/.docs/raw/reference/client-js/telemetry.mdx +1 -1
- package/.docs/raw/reference/client-js/tools.mdx +1 -1
- package/.docs/raw/reference/client-js/vectors.mdx +1 -1
- package/.docs/raw/reference/client-js/workflows-legacy.mdx +1 -1
- package/.docs/raw/reference/client-js/workflows.mdx +7 -7
- package/.docs/raw/reference/core/getAgent.mdx +6 -5
- package/.docs/raw/reference/core/getAgentById.mdx +5 -4
- package/.docs/raw/reference/core/getAgents.mdx +3 -3
- package/.docs/raw/reference/core/getDeployer.mdx +4 -3
- package/.docs/raw/reference/core/getLogger.mdx +4 -3
- package/.docs/raw/reference/core/getLogs.mdx +10 -6
- package/.docs/raw/reference/core/getLogsByRunId.mdx +8 -5
- package/.docs/raw/reference/core/getMCPServer.mdx +8 -5
- package/.docs/raw/reference/core/getMCPServers.mdx +4 -3
- package/.docs/raw/reference/core/getMemory.mdx +4 -3
- package/.docs/raw/reference/core/getScorer.mdx +15 -12
- package/.docs/raw/reference/core/getScorerByName.mdx +12 -9
- package/.docs/raw/reference/core/getScorers.mdx +3 -2
- package/.docs/raw/reference/core/getServer.mdx +4 -3
- package/.docs/raw/reference/core/getStorage.mdx +4 -3
- package/.docs/raw/reference/core/getTelemetry.mdx +4 -3
- package/.docs/raw/reference/core/getVector.mdx +6 -4
- package/.docs/raw/reference/core/getVectors.mdx +4 -3
- package/.docs/raw/reference/core/getWorkflow.mdx +7 -4
- package/.docs/raw/reference/core/getWorkflows.mdx +5 -3
- package/.docs/raw/reference/core/mastra-class.mdx +16 -16
- package/.docs/raw/reference/core/setLogger.mdx +6 -4
- package/.docs/raw/reference/core/setStorage.mdx +4 -4
- package/.docs/raw/reference/core/setTelemetry.mdx +4 -3
- package/.docs/raw/reference/deployer/cloudflare.mdx +11 -7
- package/.docs/raw/reference/deployer/deployer.mdx +2 -1
- package/.docs/raw/reference/deployer/netlify.mdx +4 -4
- package/.docs/raw/reference/deployer/vercel.mdx +6 -6
- package/.docs/raw/reference/evals/answer-relevancy.mdx +4 -5
- package/.docs/raw/reference/evals/bias.mdx +4 -5
- package/.docs/raw/reference/evals/completeness.mdx +4 -5
- package/.docs/raw/reference/evals/content-similarity.mdx +4 -5
- package/.docs/raw/reference/evals/context-position.mdx +4 -5
- package/.docs/raw/reference/evals/context-precision.mdx +4 -5
- package/.docs/raw/reference/evals/context-relevancy.mdx +4 -5
- package/.docs/raw/reference/evals/contextual-recall.mdx +4 -5
- package/.docs/raw/reference/evals/faithfulness.mdx +4 -5
- package/.docs/raw/reference/evals/hallucination.mdx +4 -6
- package/.docs/raw/reference/evals/keyword-coverage.mdx +4 -5
- package/.docs/raw/reference/evals/prompt-alignment.mdx +4 -6
- package/.docs/raw/reference/evals/summarization.mdx +4 -6
- package/.docs/raw/reference/evals/textual-difference.mdx +4 -5
- package/.docs/raw/reference/evals/tone-consistency.mdx +4 -7
- package/.docs/raw/reference/evals/toxicity.mdx +4 -5
- package/.docs/raw/reference/index.mdx +3 -3
- package/.docs/raw/reference/legacyWorkflows/after.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/afterEvent.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/commit.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/createRun.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/else.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/events.mdx +7 -7
- package/.docs/raw/reference/legacyWorkflows/execute.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/if.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/resume.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/resumeWithEvent.mdx +6 -6
- package/.docs/raw/reference/legacyWorkflows/snapshots.mdx +6 -6
- package/.docs/raw/reference/legacyWorkflows/start.mdx +6 -6
- package/.docs/raw/reference/legacyWorkflows/step-class.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/step-condition.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/step-function.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/step-options.mdx +6 -6
- package/.docs/raw/reference/legacyWorkflows/step-retries.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/suspend.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/then.mdx +5 -5
- package/.docs/raw/reference/legacyWorkflows/until.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/watch.mdx +3 -3
- package/.docs/raw/reference/legacyWorkflows/while.mdx +4 -4
- package/.docs/raw/reference/legacyWorkflows/workflow.mdx +7 -7
- package/.docs/raw/reference/memory/createThread.mdx +10 -10
- package/.docs/raw/reference/memory/deleteMessages.mdx +7 -5
- package/.docs/raw/reference/memory/getThreadById.mdx +6 -5
- package/.docs/raw/reference/memory/getThreadsByResourceId.mdx +11 -10
- package/.docs/raw/reference/memory/getThreadsByResourceIdPaginated.mdx +10 -9
- package/.docs/raw/reference/memory/{Memory.mdx → memory-class.mdx} +53 -46
- package/.docs/raw/reference/memory/query.mdx +39 -25
- package/.docs/raw/reference/observability/ai-tracing/ai-tracing.mdx +9 -5
- package/.docs/raw/reference/observability/ai-tracing/configuration.mdx +26 -22
- package/.docs/raw/reference/observability/ai-tracing/exporters/arize.mdx +16 -12
- package/.docs/raw/reference/observability/ai-tracing/exporters/braintrust.mdx +14 -14
- package/.docs/raw/reference/observability/ai-tracing/exporters/cloud-exporter.mdx +16 -11
- package/.docs/raw/reference/observability/ai-tracing/exporters/console-exporter.mdx +10 -6
- package/.docs/raw/reference/observability/ai-tracing/exporters/default-exporter.mdx +23 -15
- package/.docs/raw/reference/observability/ai-tracing/exporters/langfuse.mdx +6 -6
- package/.docs/raw/reference/observability/ai-tracing/exporters/langsmith.mdx +13 -13
- package/.docs/raw/reference/observability/ai-tracing/exporters/otel.mdx +44 -36
- package/.docs/raw/reference/observability/ai-tracing/interfaces.mdx +115 -106
- package/.docs/raw/reference/observability/ai-tracing/processors/sensitive-data-filter.mdx +37 -32
- package/.docs/raw/reference/observability/ai-tracing/span.mdx +29 -26
- package/.docs/raw/reference/observability/logging/pino-logger.mdx +13 -15
- package/.docs/raw/reference/observability/otel-tracing/otel-config.mdx +2 -2
- package/.docs/raw/reference/observability/otel-tracing/providers/arize-ax.mdx +3 -3
- package/.docs/raw/reference/observability/otel-tracing/providers/arize-phoenix.mdx +2 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/dash0.mdx +1 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/index.mdx +12 -14
- package/.docs/raw/reference/observability/otel-tracing/providers/keywordsai.mdx +3 -3
- package/.docs/raw/reference/observability/otel-tracing/providers/laminar.mdx +1 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/langfuse.mdx +4 -4
- package/.docs/raw/reference/observability/otel-tracing/providers/langsmith.mdx +1 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/langwatch.mdx +2 -2
- package/.docs/raw/reference/observability/otel-tracing/providers/new-relic.mdx +1 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/signoz.mdx +1 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/traceloop.mdx +1 -1
- package/.docs/raw/reference/processors/batch-parts-processor.mdx +13 -10
- package/.docs/raw/reference/processors/language-detector.mdx +27 -17
- package/.docs/raw/reference/processors/moderation-processor.mdx +26 -17
- package/.docs/raw/reference/processors/pii-detector.mdx +28 -18
- package/.docs/raw/reference/processors/prompt-injection-detector.mdx +25 -17
- package/.docs/raw/reference/processors/system-prompt-scrubber.mdx +26 -17
- package/.docs/raw/reference/processors/token-limiter-processor.mdx +22 -15
- package/.docs/raw/reference/processors/unicode-normalizer.mdx +13 -12
- package/.docs/raw/reference/rag/chunk.mdx +41 -23
- package/.docs/raw/reference/rag/database-config.mdx +56 -38
- package/.docs/raw/reference/rag/document.mdx +1 -1
- package/.docs/raw/reference/rag/embeddings.mdx +1 -1
- package/.docs/raw/reference/rag/extract-params.mdx +1 -1
- package/.docs/raw/reference/rag/graph-rag.mdx +1 -1
- package/.docs/raw/reference/rag/metadata-filters.mdx +23 -26
- package/.docs/raw/reference/rag/rerank.mdx +1 -1
- package/.docs/raw/reference/rag/rerankWithScorer.mdx +2 -2
- package/.docs/raw/reference/scorers/answer-relevancy.mdx +35 -17
- package/.docs/raw/reference/scorers/answer-similarity.mdx +84 -68
- package/.docs/raw/reference/scorers/bias.mdx +22 -19
- package/.docs/raw/reference/scorers/completeness.mdx +21 -16
- package/.docs/raw/reference/scorers/content-similarity.mdx +12 -10
- package/.docs/raw/reference/scorers/context-precision.mdx +73 -64
- package/.docs/raw/reference/scorers/context-relevance.mdx +142 -126
- package/.docs/raw/reference/scorers/create-scorer.mdx +93 -61
- package/.docs/raw/reference/scorers/faithfulness.mdx +21 -13
- package/.docs/raw/reference/scorers/hallucination.mdx +17 -12
- package/.docs/raw/reference/scorers/keyword-coverage.mdx +32 -27
- package/.docs/raw/reference/scorers/mastra-scorer.mdx +39 -33
- package/.docs/raw/reference/scorers/noise-sensitivity.mdx +203 -152
- package/.docs/raw/reference/scorers/prompt-alignment.mdx +186 -132
- package/.docs/raw/reference/scorers/run-experiment.mdx +40 -31
- package/.docs/raw/reference/scorers/textual-difference.mdx +25 -26
- package/.docs/raw/reference/scorers/tone-consistency.mdx +29 -26
- package/.docs/raw/reference/scorers/tool-call-accuracy.mdx +180 -176
- package/.docs/raw/reference/scorers/toxicity.mdx +35 -31
- package/.docs/raw/reference/storage/cloudflare-d1.mdx +1 -1
- package/.docs/raw/reference/storage/cloudflare.mdx +1 -1
- package/.docs/raw/reference/storage/dynamodb.mdx +1 -1
- package/.docs/raw/reference/storage/lance.mdx +1 -1
- package/.docs/raw/reference/storage/libsql.mdx +2 -2
- package/.docs/raw/reference/storage/mongodb.mdx +4 -5
- package/.docs/raw/reference/storage/mssql.mdx +5 -4
- package/.docs/raw/reference/storage/postgresql.mdx +35 -33
- package/.docs/raw/reference/storage/upstash.mdx +6 -5
- package/.docs/raw/reference/streaming/ChunkType.mdx +788 -314
- package/.docs/raw/reference/streaming/agents/MastraModelOutput.mdx +265 -109
- package/.docs/raw/reference/streaming/agents/stream.mdx +375 -266
- package/.docs/raw/reference/streaming/agents/streamLegacy.mdx +233 -162
- package/.docs/raw/reference/streaming/workflows/observeStream.mdx +4 -4
- package/.docs/raw/reference/streaming/workflows/observeStreamVNext.mdx +5 -5
- package/.docs/raw/reference/streaming/workflows/resumeStreamVNext.mdx +24 -20
- package/.docs/raw/reference/streaming/workflows/stream.mdx +35 -26
- package/.docs/raw/reference/streaming/workflows/streamVNext.mdx +36 -27
- package/.docs/raw/reference/templates/overview.mdx +16 -39
- package/.docs/raw/reference/tools/client.mdx +1 -1
- package/.docs/raw/reference/tools/create-tool.mdx +45 -35
- package/.docs/raw/reference/tools/document-chunker-tool.mdx +2 -2
- package/.docs/raw/reference/tools/graph-rag-tool.mdx +12 -12
- package/.docs/raw/reference/tools/mcp-client.mdx +70 -64
- package/.docs/raw/reference/tools/mcp-server.mdx +91 -78
- package/.docs/raw/reference/tools/vector-query-tool.mdx +48 -38
- package/.docs/raw/reference/vectors/astra.mdx +1 -1
- package/.docs/raw/reference/vectors/chroma.mdx +25 -19
- package/.docs/raw/reference/vectors/couchbase.mdx +4 -4
- package/.docs/raw/reference/vectors/lance.mdx +5 -6
- package/.docs/raw/reference/vectors/libsql.mdx +1 -1
- package/.docs/raw/reference/vectors/mongodb.mdx +1 -1
- package/.docs/raw/reference/vectors/opensearch.mdx +1 -1
- package/.docs/raw/reference/vectors/pg.mdx +8 -4
- package/.docs/raw/reference/vectors/pinecone.mdx +1 -1
- package/.docs/raw/reference/vectors/qdrant.mdx +1 -1
- package/.docs/raw/reference/vectors/s3vectors.mdx +35 -27
- package/.docs/raw/reference/vectors/turbopuffer.mdx +1 -1
- package/.docs/raw/reference/vectors/upstash.mdx +33 -25
- package/.docs/raw/reference/vectors/vectorize.mdx +1 -1
- package/.docs/raw/reference/voice/azure.mdx +1 -1
- package/.docs/raw/reference/voice/cloudflare.mdx +1 -1
- package/.docs/raw/reference/voice/composite-voice.mdx +1 -1
- package/.docs/raw/reference/voice/deepgram.mdx +1 -1
- package/.docs/raw/reference/voice/elevenlabs.mdx +1 -1
- package/.docs/raw/reference/voice/google-gemini-live.mdx +6 -4
- package/.docs/raw/reference/voice/google.mdx +1 -1
- package/.docs/raw/reference/voice/mastra-voice.mdx +1 -1
- package/.docs/raw/reference/voice/murf.mdx +1 -1
- package/.docs/raw/reference/voice/openai-realtime.mdx +1 -1
- package/.docs/raw/reference/voice/openai.mdx +1 -1
- package/.docs/raw/reference/voice/playai.mdx +1 -1
- package/.docs/raw/reference/voice/sarvam.mdx +1 -1
- package/.docs/raw/reference/voice/speechify.mdx +1 -1
- package/.docs/raw/reference/voice/voice.addInstructions.mdx +1 -1
- package/.docs/raw/reference/voice/voice.addTools.mdx +1 -1
- package/.docs/raw/reference/voice/voice.answer.mdx +1 -1
- package/.docs/raw/reference/voice/voice.close.mdx +1 -1
- package/.docs/raw/reference/voice/voice.connect.mdx +1 -1
- package/.docs/raw/reference/voice/voice.events.mdx +1 -1
- package/.docs/raw/reference/voice/voice.getSpeakers.mdx +23 -30
- package/.docs/raw/reference/voice/voice.listen.mdx +1 -1
- package/.docs/raw/reference/voice/voice.off.mdx +1 -1
- package/.docs/raw/reference/voice/voice.on.mdx +1 -1
- package/.docs/raw/reference/voice/voice.send.mdx +1 -1
- package/.docs/raw/reference/voice/voice.speak.mdx +1 -1
- package/.docs/raw/reference/voice/voice.updateConfig.mdx +1 -1
- package/.docs/raw/reference/workflows/run-methods/cancel.mdx +4 -3
- package/.docs/raw/reference/workflows/run-methods/resume.mdx +49 -34
- package/.docs/raw/reference/workflows/run-methods/start.mdx +43 -31
- package/.docs/raw/reference/workflows/run-methods/watch.mdx +7 -8
- package/.docs/raw/reference/workflows/run.mdx +7 -10
- package/.docs/raw/reference/workflows/step.mdx +15 -12
- package/.docs/raw/reference/workflows/workflow-methods/branch.mdx +2 -2
- package/.docs/raw/reference/workflows/workflow-methods/commit.mdx +1 -1
- package/.docs/raw/reference/workflows/workflow-methods/create-run.mdx +7 -7
- package/.docs/raw/reference/workflows/workflow-methods/dountil.mdx +2 -2
- package/.docs/raw/reference/workflows/workflow-methods/dowhile.mdx +2 -2
- package/.docs/raw/reference/workflows/workflow-methods/foreach.mdx +1 -1
- package/.docs/raw/reference/workflows/workflow-methods/map.mdx +3 -2
- package/.docs/raw/reference/workflows/workflow-methods/parallel.mdx +1 -1
- package/.docs/raw/reference/workflows/workflow-methods/sendEvent.mdx +5 -5
- package/.docs/raw/reference/workflows/workflow-methods/sleep.mdx +3 -2
- package/.docs/raw/reference/workflows/workflow-methods/sleepUntil.mdx +3 -2
- package/.docs/raw/reference/workflows/workflow-methods/then.mdx +1 -1
- package/.docs/raw/reference/workflows/workflow-methods/waitForEvent.mdx +3 -3
- package/.docs/raw/reference/workflows/workflow.mdx +13 -10
- package/.docs/raw/scorers/custom-scorers.mdx +58 -48
- package/.docs/raw/scorers/evals-old-api/custom-eval.mdx +6 -8
- package/.docs/raw/scorers/evals-old-api/overview.mdx +8 -8
- package/.docs/raw/scorers/evals-old-api/running-in-ci.mdx +9 -9
- package/.docs/raw/scorers/evals-old-api/textual-evals.mdx +5 -5
- package/.docs/raw/scorers/off-the-shelf-scorers.mdx +3 -1
- package/.docs/raw/scorers/overview.mdx +20 -19
- package/.docs/raw/server-db/custom-api-routes.mdx +8 -8
- package/.docs/raw/server-db/mastra-client.mdx +56 -54
- package/.docs/raw/server-db/middleware.mdx +11 -7
- package/.docs/raw/server-db/production-server.mdx +5 -7
- package/.docs/raw/server-db/runtime-context.mdx +41 -38
- package/.docs/raw/server-db/storage.mdx +82 -87
- package/.docs/raw/streaming/events.mdx +24 -16
- package/.docs/raw/streaming/overview.mdx +17 -18
- package/.docs/raw/streaming/tool-streaming.mdx +9 -10
- package/.docs/raw/streaming/workflow-streaming.mdx +14 -12
- package/.docs/raw/tools-mcp/advanced-usage.mdx +2 -2
- package/.docs/raw/tools-mcp/mcp-overview.mdx +92 -102
- package/.docs/raw/tools-mcp/overview.mdx +9 -14
- package/.docs/raw/voice/overview.mdx +273 -250
- package/.docs/raw/voice/speech-to-speech.mdx +14 -12
- package/.docs/raw/voice/speech-to-text.mdx +2 -2
- package/.docs/raw/voice/text-to-speech.mdx +2 -2
- package/.docs/raw/workflows/agents-and-tools.mdx +29 -28
- package/.docs/raw/workflows/control-flow.mdx +24 -24
- package/.docs/raw/workflows/error-handling.mdx +15 -17
- package/.docs/raw/workflows/human-in-the-loop.mdx +39 -39
- package/.docs/raw/workflows/inngest-workflow.mdx +33 -29
- package/.docs/raw/workflows/input-data-mapping.mdx +9 -9
- package/.docs/raw/workflows/overview.mdx +60 -60
- package/.docs/raw/workflows/snapshots.mdx +54 -36
- package/.docs/raw/workflows/suspend-and-resume.mdx +52 -57
- package/.docs/raw/workflows-legacy/control-flow.mdx +15 -17
- package/.docs/raw/workflows-legacy/dynamic-workflows.mdx +3 -1
- package/.docs/raw/workflows-legacy/error-handling.mdx +8 -6
- package/.docs/raw/workflows-legacy/nested-workflows.mdx +6 -0
- package/.docs/raw/workflows-legacy/overview.mdx +28 -26
- package/.docs/raw/workflows-legacy/runtime-variables.mdx +4 -2
- package/.docs/raw/workflows-legacy/steps.mdx +5 -3
- package/.docs/raw/workflows-legacy/suspend-and-resume.mdx +10 -8
- package/.docs/raw/workflows-legacy/variables.mdx +10 -8
- package/CHANGELOG.md +14 -0
- package/package.json +5 -5
- package/.docs/raw/memory/storage/memory-with-mongodb.mdx +0 -148
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
---
|
|
2
|
-
title: "Reference: Tool Call Accuracy | Scorers | Mastra Docs"
|
|
2
|
+
title: "Reference: Tool Call Accuracy Scorers | Scorers | Mastra Docs"
|
|
3
3
|
description: Documentation for the Tool Call Accuracy Scorers in Mastra, which evaluate whether LLM outputs call the correct tools from available options.
|
|
4
4
|
---
|
|
5
5
|
|
|
@@ -39,20 +39,23 @@ The `createToolCallAccuracyScorerCode()` function from `@mastra/evals/scorers/co
|
|
|
39
39
|
{
|
|
40
40
|
name: "expectedTool",
|
|
41
41
|
type: "string",
|
|
42
|
-
description:
|
|
42
|
+
description:
|
|
43
|
+
"The name of the tool that should be called for the given task. Ignored when expectedToolOrder is provided.",
|
|
43
44
|
required: false,
|
|
44
45
|
},
|
|
45
46
|
{
|
|
46
47
|
name: "strictMode",
|
|
47
48
|
type: "boolean",
|
|
48
|
-
description:
|
|
49
|
+
description:
|
|
50
|
+
"Controls evaluation strictness. For single tool mode: only exact single tool calls accepted. For order checking mode: tools must match exactly with no extra tools allowed.",
|
|
49
51
|
required: false,
|
|
50
52
|
default: "false",
|
|
51
53
|
},
|
|
52
54
|
{
|
|
53
55
|
name: "expectedToolOrder",
|
|
54
56
|
type: "string[]",
|
|
55
|
-
description:
|
|
57
|
+
description:
|
|
58
|
+
"Array of tool names in the expected calling order. When provided, enables order checking mode and ignores expectedTool parameter.",
|
|
56
59
|
required: false,
|
|
57
60
|
},
|
|
58
61
|
]}
|
|
@@ -88,22 +91,22 @@ When `expectedToolOrder` is provided, the scorer validates tool calling sequence
|
|
|
88
91
|
|
|
89
92
|
```typescript showLineNumbers copy
|
|
90
93
|
// Standard mode - passes if expected tool is called
|
|
91
|
-
const lenientScorer = createCodeScorer({
|
|
92
|
-
expectedTool:
|
|
93
|
-
strictMode: false
|
|
94
|
+
const lenientScorer = createCodeScorer({
|
|
95
|
+
expectedTool: "search-tool",
|
|
96
|
+
strictMode: false,
|
|
94
97
|
});
|
|
95
98
|
|
|
96
99
|
// Strict mode - only passes if exactly one tool is called
|
|
97
|
-
const strictScorer = createCodeScorer({
|
|
98
|
-
expectedTool:
|
|
99
|
-
strictMode: true
|
|
100
|
+
const strictScorer = createCodeScorer({
|
|
101
|
+
expectedTool: "search-tool",
|
|
102
|
+
strictMode: true,
|
|
100
103
|
});
|
|
101
104
|
|
|
102
105
|
// Order checking with strict mode
|
|
103
106
|
const strictOrderScorer = createCodeScorer({
|
|
104
|
-
expectedTool:
|
|
105
|
-
expectedToolOrder: [
|
|
106
|
-
strictMode: true // no extra tools allowed
|
|
107
|
+
expectedTool: "step1-tool",
|
|
108
|
+
expectedToolOrder: ["step1-tool", "step2-tool", "step3-tool"],
|
|
109
|
+
strictMode: true, // no extra tools allowed
|
|
107
110
|
});
|
|
108
111
|
```
|
|
109
112
|
|
|
@@ -132,35 +135,35 @@ The code-based scorer provides deterministic, binary scoring (0 or 1) based on e
|
|
|
132
135
|
|
|
133
136
|
### Correct tool selection
|
|
134
137
|
|
|
135
|
-
```typescript
|
|
136
|
-
const scorer = createToolCallAccuracyScorerCode({
|
|
137
|
-
expectedTool:
|
|
138
|
+
```typescript title="src/example-correct-tool.ts" showLineNumbers copy
|
|
139
|
+
const scorer = createToolCallAccuracyScorerCode({
|
|
140
|
+
expectedTool: "weather-tool",
|
|
138
141
|
});
|
|
139
142
|
|
|
140
143
|
// Simulate LLM input and output with tool call
|
|
141
144
|
const inputMessages = [
|
|
142
|
-
createUIMessage({
|
|
143
|
-
content:
|
|
144
|
-
role:
|
|
145
|
-
id:
|
|
146
|
-
})
|
|
145
|
+
createUIMessage({
|
|
146
|
+
content: "What is the weather like in New York today?",
|
|
147
|
+
role: "user",
|
|
148
|
+
id: "input-1",
|
|
149
|
+
}),
|
|
147
150
|
];
|
|
148
151
|
|
|
149
152
|
const output = [
|
|
150
153
|
createUIMessage({
|
|
151
|
-
content:
|
|
152
|
-
role:
|
|
153
|
-
id:
|
|
154
|
+
content: "Let me check the weather for you.",
|
|
155
|
+
role: "assistant",
|
|
156
|
+
id: "output-1",
|
|
154
157
|
toolInvocations: [
|
|
155
158
|
createToolInvocation({
|
|
156
|
-
toolCallId:
|
|
157
|
-
toolName:
|
|
158
|
-
args: { location:
|
|
159
|
-
result: { temperature:
|
|
160
|
-
state:
|
|
161
|
-
})
|
|
162
|
-
]
|
|
163
|
-
})
|
|
159
|
+
toolCallId: "call-123",
|
|
160
|
+
toolName: "weather-tool",
|
|
161
|
+
args: { location: "New York" },
|
|
162
|
+
result: { temperature: "72°F", condition: "sunny" },
|
|
163
|
+
state: "result",
|
|
164
|
+
}),
|
|
165
|
+
],
|
|
166
|
+
}),
|
|
164
167
|
];
|
|
165
168
|
|
|
166
169
|
const run = createAgentTestRun({ inputMessages, output });
|
|
@@ -174,35 +177,35 @@ console.log(result.preprocessStepResult?.correctToolCalled); // true
|
|
|
174
177
|
|
|
175
178
|
Only passes if exactly one tool is called:
|
|
176
179
|
|
|
177
|
-
```typescript
|
|
178
|
-
const strictScorer = createToolCallAccuracyScorerCode({
|
|
179
|
-
expectedTool:
|
|
180
|
-
strictMode: true
|
|
180
|
+
```typescript title="src/example-strict-mode.ts" showLineNumbers copy
|
|
181
|
+
const strictScorer = createToolCallAccuracyScorerCode({
|
|
182
|
+
expectedTool: "weather-tool",
|
|
183
|
+
strictMode: true,
|
|
181
184
|
});
|
|
182
185
|
|
|
183
186
|
// Multiple tools called - fails in strict mode
|
|
184
187
|
const output = [
|
|
185
188
|
createUIMessage({
|
|
186
|
-
content:
|
|
187
|
-
role:
|
|
188
|
-
id:
|
|
189
|
+
content: "Let me help you with that.",
|
|
190
|
+
role: "assistant",
|
|
191
|
+
id: "output-1",
|
|
189
192
|
toolInvocations: [
|
|
190
193
|
createToolInvocation({
|
|
191
|
-
toolCallId:
|
|
192
|
-
toolName:
|
|
194
|
+
toolCallId: "call-1",
|
|
195
|
+
toolName: "search-tool",
|
|
193
196
|
args: {},
|
|
194
197
|
result: {},
|
|
195
|
-
state:
|
|
198
|
+
state: "result",
|
|
196
199
|
}),
|
|
197
200
|
createToolInvocation({
|
|
198
|
-
toolCallId:
|
|
199
|
-
toolName:
|
|
200
|
-
args: { location:
|
|
201
|
-
result: { temperature:
|
|
202
|
-
state:
|
|
203
|
-
})
|
|
204
|
-
]
|
|
205
|
-
})
|
|
201
|
+
toolCallId: "call-2",
|
|
202
|
+
toolName: "weather-tool",
|
|
203
|
+
args: { location: "New York" },
|
|
204
|
+
result: { temperature: "20°C" },
|
|
205
|
+
state: "result",
|
|
206
|
+
}),
|
|
207
|
+
],
|
|
208
|
+
}),
|
|
206
209
|
];
|
|
207
210
|
|
|
208
211
|
const result = await strictScorer.run(run);
|
|
@@ -213,35 +216,35 @@ console.log(result.score); // 0 - fails because multiple tools were called
|
|
|
213
216
|
|
|
214
217
|
Validates that tools are called in a specific sequence:
|
|
215
218
|
|
|
216
|
-
```typescript
|
|
219
|
+
```typescript title="src/example-order-validation.ts" showLineNumbers copy
|
|
217
220
|
const orderScorer = createToolCallAccuracyScorerCode({
|
|
218
|
-
expectedTool:
|
|
219
|
-
expectedToolOrder: [
|
|
220
|
-
strictMode: true // no extra tools allowed
|
|
221
|
+
expectedTool: "auth-tool", // ignored when order is specified
|
|
222
|
+
expectedToolOrder: ["auth-tool", "fetch-tool"],
|
|
223
|
+
strictMode: true, // no extra tools allowed
|
|
221
224
|
});
|
|
222
225
|
|
|
223
226
|
const output = [
|
|
224
227
|
createUIMessage({
|
|
225
|
-
content:
|
|
226
|
-
role:
|
|
227
|
-
id:
|
|
228
|
+
content: "I will authenticate and fetch the data.",
|
|
229
|
+
role: "assistant",
|
|
230
|
+
id: "output-1",
|
|
228
231
|
toolInvocations: [
|
|
229
232
|
createToolInvocation({
|
|
230
|
-
toolCallId:
|
|
231
|
-
toolName:
|
|
232
|
-
args: { token:
|
|
233
|
+
toolCallId: "call-1",
|
|
234
|
+
toolName: "auth-tool",
|
|
235
|
+
args: { token: "abc123" },
|
|
233
236
|
result: { authenticated: true },
|
|
234
|
-
state:
|
|
237
|
+
state: "result",
|
|
235
238
|
}),
|
|
236
239
|
createToolInvocation({
|
|
237
|
-
toolCallId:
|
|
238
|
-
toolName:
|
|
239
|
-
args: { endpoint:
|
|
240
|
-
result: { data: [
|
|
241
|
-
state:
|
|
242
|
-
})
|
|
243
|
-
]
|
|
244
|
-
})
|
|
240
|
+
toolCallId: "call-2",
|
|
241
|
+
toolName: "fetch-tool",
|
|
242
|
+
args: { endpoint: "/data" },
|
|
243
|
+
result: { data: ["item1"] },
|
|
244
|
+
state: "result",
|
|
245
|
+
}),
|
|
246
|
+
],
|
|
247
|
+
}),
|
|
245
248
|
];
|
|
246
249
|
|
|
247
250
|
const result = await orderScorer.run(run);
|
|
@@ -252,42 +255,42 @@ console.log(result.score); // 1 - correct order
|
|
|
252
255
|
|
|
253
256
|
Allows extra tools as long as expected tools maintain relative order:
|
|
254
257
|
|
|
255
|
-
```typescript
|
|
258
|
+
```typescript title="src/example-flexible-order.ts" showLineNumbers copy
|
|
256
259
|
const flexibleOrderScorer = createToolCallAccuracyScorerCode({
|
|
257
|
-
expectedTool:
|
|
258
|
-
expectedToolOrder: [
|
|
259
|
-
strictMode: false // allows extra tools
|
|
260
|
+
expectedTool: "auth-tool",
|
|
261
|
+
expectedToolOrder: ["auth-tool", "fetch-tool"],
|
|
262
|
+
strictMode: false, // allows extra tools
|
|
260
263
|
});
|
|
261
264
|
|
|
262
265
|
const output = [
|
|
263
266
|
createUIMessage({
|
|
264
|
-
content:
|
|
265
|
-
role:
|
|
266
|
-
id:
|
|
267
|
+
content: "Performing comprehensive operation.",
|
|
268
|
+
role: "assistant",
|
|
269
|
+
id: "output-1",
|
|
267
270
|
toolInvocations: [
|
|
268
271
|
createToolInvocation({
|
|
269
|
-
toolCallId:
|
|
270
|
-
toolName:
|
|
271
|
-
args: { token:
|
|
272
|
+
toolCallId: "call-1",
|
|
273
|
+
toolName: "auth-tool",
|
|
274
|
+
args: { token: "abc123" },
|
|
272
275
|
result: { authenticated: true },
|
|
273
|
-
state:
|
|
276
|
+
state: "result",
|
|
274
277
|
}),
|
|
275
278
|
createToolInvocation({
|
|
276
|
-
toolCallId:
|
|
277
|
-
toolName:
|
|
278
|
-
args: { message:
|
|
279
|
+
toolCallId: "call-2",
|
|
280
|
+
toolName: "log-tool", // Extra tool - OK in flexible mode
|
|
281
|
+
args: { message: "Starting fetch" },
|
|
279
282
|
result: { logged: true },
|
|
280
|
-
state:
|
|
283
|
+
state: "result",
|
|
281
284
|
}),
|
|
282
285
|
createToolInvocation({
|
|
283
|
-
toolCallId:
|
|
284
|
-
toolName:
|
|
285
|
-
args: { endpoint:
|
|
286
|
-
result: { data: [
|
|
287
|
-
state:
|
|
288
|
-
})
|
|
289
|
-
]
|
|
290
|
-
})
|
|
286
|
+
toolCallId: "call-3",
|
|
287
|
+
toolName: "fetch-tool",
|
|
288
|
+
args: { endpoint: "/data" },
|
|
289
|
+
result: { data: ["item1"] },
|
|
290
|
+
state: "result",
|
|
291
|
+
}),
|
|
292
|
+
],
|
|
293
|
+
}),
|
|
291
294
|
];
|
|
292
295
|
|
|
293
296
|
const result = await flexibleOrderScorer.run(run);
|
|
@@ -311,7 +314,8 @@ The `createToolCallAccuracyScorerLLM()` function from `@mastra/evals/scorers/llm
|
|
|
311
314
|
{
|
|
312
315
|
name: "availableTools",
|
|
313
316
|
type: "Array<{name: string, description: string}>",
|
|
314
|
-
description:
|
|
317
|
+
description:
|
|
318
|
+
"List of available tools with their descriptions for context",
|
|
315
319
|
required: true,
|
|
316
320
|
},
|
|
317
321
|
]}
|
|
@@ -383,48 +387,48 @@ The LLM-based scorer uses AI to evaluate whether tool selections are appropriate
|
|
|
383
387
|
|
|
384
388
|
### Basic LLM evaluation
|
|
385
389
|
|
|
386
|
-
```typescript
|
|
390
|
+
```typescript title="src/example-llm-basic.ts" showLineNumbers copy
|
|
387
391
|
const llmScorer = createToolCallAccuracyScorerLLM({
|
|
388
|
-
model:
|
|
392
|
+
model: "openai/gpt-4o-mini",
|
|
389
393
|
availableTools: [
|
|
390
|
-
{
|
|
391
|
-
name:
|
|
392
|
-
description:
|
|
394
|
+
{
|
|
395
|
+
name: "weather-tool",
|
|
396
|
+
description: "Get current weather information for any location",
|
|
393
397
|
},
|
|
394
|
-
{
|
|
395
|
-
name:
|
|
396
|
-
description:
|
|
398
|
+
{
|
|
399
|
+
name: "calendar-tool",
|
|
400
|
+
description: "Check calendar events and scheduling",
|
|
397
401
|
},
|
|
398
|
-
{
|
|
399
|
-
name:
|
|
400
|
-
description:
|
|
401
|
-
}
|
|
402
|
-
]
|
|
402
|
+
{
|
|
403
|
+
name: "search-tool",
|
|
404
|
+
description: "Search the web for general information",
|
|
405
|
+
},
|
|
406
|
+
],
|
|
403
407
|
});
|
|
404
408
|
|
|
405
409
|
const inputMessages = [
|
|
406
|
-
createUIMessage({
|
|
407
|
-
content:
|
|
408
|
-
role:
|
|
409
|
-
id:
|
|
410
|
-
})
|
|
410
|
+
createUIMessage({
|
|
411
|
+
content: "What is the weather like in San Francisco today?",
|
|
412
|
+
role: "user",
|
|
413
|
+
id: "input-1",
|
|
414
|
+
}),
|
|
411
415
|
];
|
|
412
416
|
|
|
413
417
|
const output = [
|
|
414
418
|
createUIMessage({
|
|
415
|
-
content:
|
|
416
|
-
role:
|
|
417
|
-
id:
|
|
419
|
+
content: "Let me check the current weather for you.",
|
|
420
|
+
role: "assistant",
|
|
421
|
+
id: "output-1",
|
|
418
422
|
toolInvocations: [
|
|
419
423
|
createToolInvocation({
|
|
420
|
-
toolCallId:
|
|
421
|
-
toolName:
|
|
422
|
-
args: { location:
|
|
423
|
-
result: { temperature:
|
|
424
|
-
state:
|
|
425
|
-
})
|
|
426
|
-
]
|
|
427
|
-
})
|
|
424
|
+
toolCallId: "call-123",
|
|
425
|
+
toolName: "weather-tool",
|
|
426
|
+
args: { location: "San Francisco", date: "today" },
|
|
427
|
+
result: { temperature: "68°F", condition: "foggy" },
|
|
428
|
+
state: "result",
|
|
429
|
+
}),
|
|
430
|
+
],
|
|
431
|
+
}),
|
|
428
432
|
];
|
|
429
433
|
|
|
430
434
|
const run = createAgentTestRun({ inputMessages, output });
|
|
@@ -436,30 +440,30 @@ console.log(result.reason); // "The agent correctly used the weather-tool to add
|
|
|
436
440
|
|
|
437
441
|
### Handling inappropriate tool usage
|
|
438
442
|
|
|
439
|
-
```typescript
|
|
443
|
+
```typescript title="src/example-llm-inappropriate.ts" showLineNumbers copy
|
|
440
444
|
const inputMessages = [
|
|
441
|
-
createUIMessage({
|
|
442
|
-
content:
|
|
443
|
-
role:
|
|
444
|
-
id:
|
|
445
|
-
})
|
|
445
|
+
createUIMessage({
|
|
446
|
+
content: "What is the weather in Tokyo?",
|
|
447
|
+
role: "user",
|
|
448
|
+
id: "input-1",
|
|
449
|
+
}),
|
|
446
450
|
];
|
|
447
451
|
|
|
448
452
|
const inappropriateOutput = [
|
|
449
453
|
createUIMessage({
|
|
450
|
-
content:
|
|
451
|
-
role:
|
|
452
|
-
id:
|
|
454
|
+
content: "Let me search for that information.",
|
|
455
|
+
role: "assistant",
|
|
456
|
+
id: "output-1",
|
|
453
457
|
toolInvocations: [
|
|
454
458
|
createToolInvocation({
|
|
455
|
-
toolCallId:
|
|
456
|
-
toolName:
|
|
457
|
-
args: { query:
|
|
458
|
-
result: { results: [
|
|
459
|
-
state:
|
|
460
|
-
})
|
|
461
|
-
]
|
|
462
|
-
})
|
|
459
|
+
toolCallId: "call-456",
|
|
460
|
+
toolName: "search-tool", // Less appropriate than weather-tool
|
|
461
|
+
args: { query: "Tokyo weather" },
|
|
462
|
+
result: { results: ["Tokyo weather data..."] },
|
|
463
|
+
state: "result",
|
|
464
|
+
}),
|
|
465
|
+
],
|
|
466
|
+
}),
|
|
463
467
|
];
|
|
464
468
|
|
|
465
469
|
const run = createAgentTestRun({ inputMessages, output: inappropriateOutput });
|
|
@@ -473,12 +477,12 @@ console.log(result.reason); // "The agent used search-tool when weather-tool wou
|
|
|
473
477
|
|
|
474
478
|
The LLM scorer recognizes when agents appropriately ask for clarification:
|
|
475
479
|
|
|
476
|
-
```typescript
|
|
480
|
+
```typescript title="src/example-llm-clarification.ts" showLineNumbers copy
|
|
477
481
|
const vagueInput = [
|
|
478
|
-
createUIMessage({
|
|
479
|
-
content: 'I need help with something',
|
|
480
|
-
role: 'user',
|
|
481
|
-
id: 'input-1'
|
|
482
|
+
createUIMessage({
|
|
483
|
+
content: 'I need help with something',
|
|
484
|
+
role: 'user',
|
|
485
|
+
id: 'input-1'
|
|
482
486
|
})
|
|
483
487
|
];
|
|
484
488
|
|
|
@@ -491,9 +495,9 @@ const clarificationOutput = [
|
|
|
491
495
|
})
|
|
492
496
|
];
|
|
493
497
|
|
|
494
|
-
const run = createAgentTestRun({
|
|
495
|
-
inputMessages: vagueInput,
|
|
496
|
-
output: clarificationOutput
|
|
498
|
+
const run = createAgentTestRun({
|
|
499
|
+
inputMessages: vagueInput,
|
|
500
|
+
output: clarificationOutput
|
|
497
501
|
});
|
|
498
502
|
const result = await llmScorer.run(run);
|
|
499
503
|
|
|
@@ -505,58 +509,58 @@ console.log(result.reason); // "The agent appropriately asked for clarification
|
|
|
505
509
|
|
|
506
510
|
Here's an example using both scorers on the same data:
|
|
507
511
|
|
|
508
|
-
```typescript
|
|
509
|
-
import { createToolCallAccuracyScorerCode as createCodeScorer } from
|
|
510
|
-
import { createToolCallAccuracyScorerLLM as createLLMScorer } from
|
|
512
|
+
```typescript title="src/example-comparison.ts" showLineNumbers copy
|
|
513
|
+
import { createToolCallAccuracyScorerCode as createCodeScorer } from "@mastra/evals/scorers/code";
|
|
514
|
+
import { createToolCallAccuracyScorerLLM as createLLMScorer } from "@mastra/evals/scorers/llm";
|
|
511
515
|
|
|
512
516
|
// Setup both scorers
|
|
513
517
|
const codeScorer = createCodeScorer({
|
|
514
|
-
expectedTool:
|
|
515
|
-
strictMode: false
|
|
518
|
+
expectedTool: "weather-tool",
|
|
519
|
+
strictMode: false,
|
|
516
520
|
});
|
|
517
521
|
|
|
518
522
|
const llmScorer = createLLMScorer({
|
|
519
|
-
model:
|
|
523
|
+
model: "openai/gpt-4o-mini",
|
|
520
524
|
availableTools: [
|
|
521
|
-
{ name:
|
|
522
|
-
{ name:
|
|
523
|
-
]
|
|
525
|
+
{ name: "weather-tool", description: "Get weather information" },
|
|
526
|
+
{ name: "search-tool", description: "Search the web" },
|
|
527
|
+
],
|
|
524
528
|
});
|
|
525
529
|
|
|
526
530
|
// Test data
|
|
527
531
|
const run = createAgentTestRun({
|
|
528
532
|
inputMessages: [
|
|
529
|
-
createUIMessage({
|
|
530
|
-
content:
|
|
531
|
-
role:
|
|
532
|
-
id:
|
|
533
|
-
})
|
|
533
|
+
createUIMessage({
|
|
534
|
+
content: "What is the weather?",
|
|
535
|
+
role: "user",
|
|
536
|
+
id: "input-1",
|
|
537
|
+
}),
|
|
534
538
|
],
|
|
535
539
|
output: [
|
|
536
540
|
createUIMessage({
|
|
537
|
-
content:
|
|
538
|
-
role:
|
|
539
|
-
id:
|
|
541
|
+
content: "Let me find that information.",
|
|
542
|
+
role: "assistant",
|
|
543
|
+
id: "output-1",
|
|
540
544
|
toolInvocations: [
|
|
541
545
|
createToolInvocation({
|
|
542
|
-
toolCallId:
|
|
543
|
-
toolName:
|
|
544
|
-
args: { query:
|
|
545
|
-
result: { results: [
|
|
546
|
-
state:
|
|
547
|
-
})
|
|
548
|
-
]
|
|
549
|
-
})
|
|
550
|
-
]
|
|
546
|
+
toolCallId: "call-1",
|
|
547
|
+
toolName: "search-tool",
|
|
548
|
+
args: { query: "weather" },
|
|
549
|
+
result: { results: ["weather data"] },
|
|
550
|
+
state: "result",
|
|
551
|
+
}),
|
|
552
|
+
],
|
|
553
|
+
}),
|
|
554
|
+
],
|
|
551
555
|
});
|
|
552
556
|
|
|
553
557
|
// Run both scorers
|
|
554
558
|
const codeResult = await codeScorer.run(run);
|
|
555
559
|
const llmResult = await llmScorer.run(run);
|
|
556
560
|
|
|
557
|
-
console.log(
|
|
558
|
-
console.log(
|
|
559
|
-
console.log(
|
|
561
|
+
console.log("Code Scorer:", codeResult.score); // 0 - wrong tool
|
|
562
|
+
console.log("LLM Scorer:", llmResult.score); // 0.3 - partially appropriate
|
|
563
|
+
console.log("LLM Reason:", llmResult.reason); // Explains why search-tool is less appropriate
|
|
560
564
|
```
|
|
561
565
|
|
|
562
566
|
## Related
|