@mastra/mcp-docs-server 1.0.0-beta.5 → 1.0.0-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/organized/changelogs/%40mastra%2Fagent-builder.md +9 -9
- package/.docs/organized/changelogs/%40mastra%2Fai-sdk.md +67 -67
- package/.docs/organized/changelogs/%40mastra%2Fastra.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fchroma.md +12 -12
- package/.docs/organized/changelogs/%40mastra%2Fclickhouse.md +57 -57
- package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +110 -110
- package/.docs/organized/changelogs/%40mastra%2Fcloudflare-d1.md +57 -57
- package/.docs/organized/changelogs/%40mastra%2Fcloudflare.md +57 -57
- package/.docs/organized/changelogs/%40mastra%2Fcodemod.md +6 -0
- package/.docs/organized/changelogs/%40mastra%2Fconvex.md +60 -0
- package/.docs/organized/changelogs/%40mastra%2Fcore.md +358 -358
- package/.docs/organized/changelogs/%40mastra%2Fcouchbase.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloud.md +24 -24
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-netlify.md +12 -12
- package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +79 -79
- package/.docs/organized/changelogs/%40mastra%2Fduckdb.md +42 -0
- package/.docs/organized/changelogs/%40mastra%2Fdynamodb.md +57 -57
- package/.docs/organized/changelogs/%40mastra%2Felasticsearch.md +61 -0
- package/.docs/organized/changelogs/%40mastra%2Fevals.md +12 -12
- package/.docs/organized/changelogs/%40mastra%2Flance.md +57 -57
- package/.docs/organized/changelogs/%40mastra%2Flibsql.md +55 -55
- package/.docs/organized/changelogs/%40mastra%2Floggers.md +12 -12
- package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +17 -17
- package/.docs/organized/changelogs/%40mastra%2Fmcp.md +125 -125
- package/.docs/organized/changelogs/%40mastra%2Fmemory.md +36 -36
- package/.docs/organized/changelogs/%40mastra%2Fmongodb.md +57 -57
- package/.docs/organized/changelogs/%40mastra%2Fmssql.md +57 -57
- package/.docs/organized/changelogs/%40mastra%2Fopensearch.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fpg.md +59 -59
- package/.docs/organized/changelogs/%40mastra%2Fpinecone.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +77 -77
- package/.docs/organized/changelogs/%40mastra%2Fqdrant.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Frag.md +43 -43
- package/.docs/organized/changelogs/%40mastra%2Freact.md +16 -0
- package/.docs/organized/changelogs/%40mastra%2Fs3vectors.md +9 -0
- package/.docs/organized/changelogs/%40mastra%2Fschema-compat.md +6 -0
- package/.docs/organized/changelogs/%40mastra%2Fserver.md +113 -113
- package/.docs/organized/changelogs/%40mastra%2Fturbopuffer.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fupstash.md +57 -57
- package/.docs/organized/changelogs/%40mastra%2Fvectorize.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fvoice-google.md +19 -19
- package/.docs/organized/changelogs/create-mastra.md +15 -15
- package/.docs/organized/changelogs/mastra.md +30 -30
- package/.docs/organized/code-examples/agui.md +1 -0
- package/.docs/organized/code-examples/ai-elements.md +1 -1
- package/.docs/organized/code-examples/ai-sdk-useChat.md +1 -1
- package/.docs/organized/code-examples/ai-sdk-v5.md +2 -1
- package/.docs/organized/code-examples/assistant-ui.md +1 -1
- package/.docs/organized/code-examples/bird-checker-with-nextjs-and-eval.md +1 -1
- package/.docs/organized/code-examples/bird-checker-with-nextjs.md +1 -1
- package/.docs/organized/code-examples/crypto-chatbot.md +1 -1
- package/.docs/organized/code-examples/mcp-server-adapters.md +721 -0
- package/.docs/organized/code-examples/server-app-access.md +342 -0
- package/.docs/organized/code-examples/server-express-adapter.md +87 -0
- package/.docs/organized/code-examples/server-hono-adapter.md +85 -0
- package/.docs/raw/agents/agent-approval.mdx +189 -0
- package/.docs/raw/agents/guardrails.mdx +13 -9
- package/.docs/raw/agents/networks.mdx +1 -0
- package/.docs/raw/agents/overview.mdx +8 -152
- package/.docs/raw/agents/processors.mdx +279 -0
- package/.docs/raw/agents/structured-output.mdx +224 -0
- package/.docs/raw/deployment/cloud-providers/index.mdx +19 -26
- package/.docs/raw/deployment/cloud-providers/netlify-deployer.mdx +44 -13
- package/.docs/raw/evals/running-in-ci.mdx +0 -2
- package/.docs/raw/{guides/getting-started → getting-started}/manual-install.mdx +2 -2
- package/.docs/raw/getting-started/start.mdx +1 -1
- package/.docs/raw/guides/build-your-ui/ai-sdk-ui.mdx +8 -0
- package/.docs/raw/guides/getting-started/quickstart.mdx +1 -1
- package/.docs/raw/guides/guide/whatsapp-chat-bot.mdx +421 -0
- package/.docs/raw/guides/index.mdx +3 -35
- package/.docs/raw/guides/migrations/upgrade-to-v1/agent.mdx +11 -0
- package/.docs/raw/guides/migrations/upgrade-to-v1/workflows.mdx +37 -0
- package/.docs/raw/index.mdx +1 -1
- package/.docs/raw/memory/memory-processors.mdx +265 -79
- package/.docs/raw/memory/working-memory.mdx +11 -2
- package/.docs/raw/observability/overview.mdx +0 -1
- package/.docs/raw/observability/tracing/bridges/otel.mdx +200 -0
- package/.docs/raw/observability/tracing/exporters/arize.mdx +36 -0
- package/.docs/raw/observability/tracing/exporters/braintrust.mdx +19 -0
- package/.docs/raw/observability/tracing/exporters/langfuse.mdx +83 -0
- package/.docs/raw/observability/tracing/exporters/langsmith.mdx +12 -0
- package/.docs/raw/observability/tracing/exporters/otel.mdx +34 -22
- package/.docs/raw/observability/tracing/exporters/posthog.mdx +20 -0
- package/.docs/raw/observability/tracing/overview.mdx +76 -6
- package/.docs/raw/observability/tracing/processors/sensitive-data-filter.mdx +0 -1
- package/.docs/raw/rag/retrieval.mdx +23 -6
- package/.docs/raw/rag/vector-databases.mdx +93 -2
- package/.docs/raw/reference/agents/generate.mdx +55 -6
- package/.docs/raw/reference/agents/network.mdx +44 -0
- package/.docs/raw/reference/client-js/memory.mdx +43 -0
- package/.docs/raw/reference/client-js/workflows.mdx +92 -63
- package/.docs/raw/reference/deployer/netlify.mdx +1 -2
- package/.docs/raw/reference/evals/scorer-utils.mdx +362 -0
- package/.docs/raw/reference/index.mdx +1 -0
- package/.docs/raw/reference/observability/tracing/bridges/otel.mdx +177 -0
- package/.docs/raw/reference/observability/tracing/configuration.mdx +0 -4
- package/.docs/raw/reference/observability/tracing/exporters/arize.mdx +29 -0
- package/.docs/raw/reference/observability/tracing/exporters/langfuse.mdx +43 -0
- package/.docs/raw/reference/observability/tracing/exporters/langsmith.mdx +17 -1
- package/.docs/raw/reference/observability/tracing/exporters/otel.mdx +33 -43
- package/.docs/raw/reference/observability/tracing/instances.mdx +0 -4
- package/.docs/raw/reference/observability/tracing/interfaces.mdx +29 -4
- package/.docs/raw/reference/observability/tracing/spans.mdx +0 -4
- package/.docs/raw/reference/processors/language-detector.mdx +9 -2
- package/.docs/raw/reference/processors/message-history-processor.mdx +131 -0
- package/.docs/raw/reference/processors/moderation-processor.mdx +10 -3
- package/.docs/raw/reference/processors/pii-detector.mdx +10 -3
- package/.docs/raw/reference/processors/processor-interface.mdx +502 -0
- package/.docs/raw/reference/processors/prompt-injection-detector.mdx +9 -2
- package/.docs/raw/reference/processors/semantic-recall-processor.mdx +197 -0
- package/.docs/raw/reference/processors/system-prompt-scrubber.mdx +2 -2
- package/.docs/raw/reference/processors/tool-call-filter.mdx +125 -0
- package/.docs/raw/reference/processors/working-memory-processor.mdx +221 -0
- package/.docs/raw/reference/server/create-route.mdx +314 -0
- package/.docs/raw/reference/server/express-adapter.mdx +193 -0
- package/.docs/raw/reference/server/hono-adapter.mdx +174 -0
- package/.docs/raw/reference/server/mastra-server.mdx +316 -0
- package/.docs/raw/reference/server/routes.mdx +250 -0
- package/.docs/raw/reference/storage/cloudflare-d1.mdx +37 -0
- package/.docs/raw/reference/storage/convex.mdx +164 -0
- package/.docs/raw/reference/storage/lance.mdx +33 -0
- package/.docs/raw/reference/storage/libsql.mdx +37 -0
- package/.docs/raw/reference/storage/mongodb.mdx +39 -0
- package/.docs/raw/reference/storage/mssql.mdx +37 -0
- package/.docs/raw/reference/storage/postgresql.mdx +37 -0
- package/.docs/raw/reference/streaming/ChunkType.mdx +1 -1
- package/.docs/raw/reference/streaming/agents/stream.mdx +56 -1
- package/.docs/raw/reference/streaming/workflows/observeStream.mdx +7 -9
- package/.docs/raw/reference/streaming/workflows/{resumeStreamVNext.mdx → resumeStream.mdx} +51 -11
- package/.docs/raw/reference/streaming/workflows/stream.mdx +83 -24
- package/.docs/raw/reference/streaming/workflows/timeTravelStream.mdx +170 -0
- package/.docs/raw/reference/tools/mcp-client.mdx +128 -18
- package/.docs/raw/reference/vectors/convex.mdx +429 -0
- package/.docs/raw/reference/vectors/duckdb.mdx +462 -0
- package/.docs/raw/reference/vectors/elasticsearch.mdx +310 -0
- package/.docs/raw/reference/voice/google.mdx +159 -20
- package/.docs/raw/reference/workflows/run-methods/restart.mdx +142 -0
- package/.docs/raw/reference/workflows/run-methods/resume.mdx +44 -0
- package/.docs/raw/reference/workflows/run-methods/start.mdx +44 -0
- package/.docs/raw/reference/workflows/run-methods/timeTravel.mdx +310 -0
- package/.docs/raw/reference/workflows/run.mdx +27 -5
- package/.docs/raw/reference/workflows/step.mdx +13 -0
- package/.docs/raw/reference/workflows/workflow.mdx +19 -0
- package/.docs/raw/server-db/custom-adapters.mdx +380 -0
- package/.docs/raw/server-db/mastra-server.mdx +16 -8
- package/.docs/raw/server-db/request-context.mdx +0 -1
- package/.docs/raw/server-db/server-adapters.mdx +286 -0
- package/.docs/raw/server-db/storage.mdx +11 -0
- package/.docs/raw/streaming/overview.mdx +6 -6
- package/.docs/raw/streaming/tool-streaming.mdx +2 -2
- package/.docs/raw/streaming/workflow-streaming.mdx +5 -11
- package/.docs/raw/workflows/error-handling.mdx +1 -0
- package/.docs/raw/workflows/human-in-the-loop.mdx +4 -4
- package/.docs/raw/workflows/overview.mdx +56 -44
- package/.docs/raw/workflows/snapshots.mdx +1 -0
- package/.docs/raw/workflows/suspend-and-resume.mdx +85 -16
- package/.docs/raw/workflows/time-travel.mdx +313 -0
- package/.docs/raw/workflows/workflow-state.mdx +191 -0
- package/CHANGELOG.md +16 -0
- package/package.json +4 -4
- package/.docs/raw/agents/human-in-the-loop-with-tools.mdx +0 -91
- package/.docs/raw/reference/streaming/workflows/observeStreamVNext.mdx +0 -47
- package/.docs/raw/reference/streaming/workflows/streamVNext.mdx +0 -153
|
@@ -0,0 +1,362 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: Scorer Utils | Evals"
|
|
3
|
+
description: Utility functions for extracting data from scorer run inputs and outputs, including text content, reasoning, system messages, and tool calls.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Scorer Utils
|
|
7
|
+
|
|
8
|
+
Mastra provides utility functions to help extract and process data from scorer run inputs and outputs. These utilities are particularly useful in the `preprocess` step of custom scorers.
|
|
9
|
+
|
|
10
|
+
## Import
|
|
11
|
+
|
|
12
|
+
```typescript
|
|
13
|
+
import {
|
|
14
|
+
getAssistantMessageFromRunOutput,
|
|
15
|
+
getReasoningFromRunOutput,
|
|
16
|
+
getUserMessageFromRunInput,
|
|
17
|
+
getSystemMessagesFromRunInput,
|
|
18
|
+
getCombinedSystemPrompt,
|
|
19
|
+
extractToolCalls,
|
|
20
|
+
extractInputMessages,
|
|
21
|
+
extractAgentResponseMessages,
|
|
22
|
+
} from "@mastra/evals/scorers/utils";
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Message Extraction
|
|
26
|
+
|
|
27
|
+
### getAssistantMessageFromRunOutput
|
|
28
|
+
|
|
29
|
+
Extracts the text content from the first assistant message in the run output.
|
|
30
|
+
|
|
31
|
+
```typescript
|
|
32
|
+
const scorer = createScorer({
|
|
33
|
+
id: "my-scorer",
|
|
34
|
+
description: "My scorer",
|
|
35
|
+
type: "agent",
|
|
36
|
+
})
|
|
37
|
+
.preprocess(({ run }) => {
|
|
38
|
+
const response = getAssistantMessageFromRunOutput(run.output);
|
|
39
|
+
return { response };
|
|
40
|
+
})
|
|
41
|
+
.generateScore(({ results }) => {
|
|
42
|
+
return results.preprocessStepResult?.response ? 1 : 0;
|
|
43
|
+
});
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
<PropertiesTable
|
|
47
|
+
content={[
|
|
48
|
+
{
|
|
49
|
+
name: "output",
|
|
50
|
+
type: "ScorerRunOutputForAgent",
|
|
51
|
+
isOptional: true,
|
|
52
|
+
description: "The scorer run output (array of MastraDBMessage)",
|
|
53
|
+
},
|
|
54
|
+
]}
|
|
55
|
+
/>
|
|
56
|
+
|
|
57
|
+
**Returns:** `string | undefined` - The assistant message text, or undefined if no assistant message is found.
|
|
58
|
+
|
|
59
|
+
### getUserMessageFromRunInput
|
|
60
|
+
|
|
61
|
+
Extracts the text content from the first user message in the run input.
|
|
62
|
+
|
|
63
|
+
```typescript
|
|
64
|
+
.preprocess(({ run }) => {
|
|
65
|
+
const userMessage = getUserMessageFromRunInput(run.input);
|
|
66
|
+
return { userMessage };
|
|
67
|
+
})
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
<PropertiesTable
|
|
71
|
+
content={[
|
|
72
|
+
{
|
|
73
|
+
name: "input",
|
|
74
|
+
type: "ScorerRunInputForAgent",
|
|
75
|
+
isOptional: true,
|
|
76
|
+
description: "The scorer run input containing input messages",
|
|
77
|
+
},
|
|
78
|
+
]}
|
|
79
|
+
/>
|
|
80
|
+
|
|
81
|
+
**Returns:** `string | undefined` - The user message text, or undefined if no user message is found.
|
|
82
|
+
|
|
83
|
+
### extractInputMessages
|
|
84
|
+
|
|
85
|
+
Extracts text content from all input messages as an array.
|
|
86
|
+
|
|
87
|
+
```typescript
|
|
88
|
+
.preprocess(({ run }) => {
|
|
89
|
+
const allUserMessages = extractInputMessages(run.input);
|
|
90
|
+
return { conversationHistory: allUserMessages.join("\n") };
|
|
91
|
+
})
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
**Returns:** `string[]` - Array of text strings from each input message.
|
|
95
|
+
|
|
96
|
+
### extractAgentResponseMessages
|
|
97
|
+
|
|
98
|
+
Extracts text content from all assistant response messages as an array.
|
|
99
|
+
|
|
100
|
+
```typescript
|
|
101
|
+
.preprocess(({ run }) => {
|
|
102
|
+
const allResponses = extractAgentResponseMessages(run.output);
|
|
103
|
+
return { allResponses };
|
|
104
|
+
})
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
**Returns:** `string[]` - Array of text strings from each assistant message.
|
|
108
|
+
|
|
109
|
+
## Reasoning Extraction
|
|
110
|
+
|
|
111
|
+
### getReasoningFromRunOutput
|
|
112
|
+
|
|
113
|
+
Extracts reasoning text from the run output. This is particularly useful when evaluating responses from reasoning models like `deepseek-reasoner` that produce chain-of-thought reasoning.
|
|
114
|
+
|
|
115
|
+
Reasoning can be stored in two places:
|
|
116
|
+
1. `content.reasoning` - a string field on the message content
|
|
117
|
+
2. `content.parts` - as parts with `type: 'reasoning'` containing `details`
|
|
118
|
+
|
|
119
|
+
```typescript
|
|
120
|
+
import {
|
|
121
|
+
getReasoningFromRunOutput,
|
|
122
|
+
getAssistantMessageFromRunOutput
|
|
123
|
+
} from "@mastra/evals/scorers/utils";
|
|
124
|
+
|
|
125
|
+
const reasoningQualityScorer = createScorer({
|
|
126
|
+
id: "reasoning-quality",
|
|
127
|
+
name: "Reasoning Quality",
|
|
128
|
+
description: "Evaluates the quality of model reasoning",
|
|
129
|
+
type: "agent",
|
|
130
|
+
})
|
|
131
|
+
.preprocess(({ run }) => {
|
|
132
|
+
const reasoning = getReasoningFromRunOutput(run.output);
|
|
133
|
+
const response = getAssistantMessageFromRunOutput(run.output);
|
|
134
|
+
return { reasoning, response };
|
|
135
|
+
})
|
|
136
|
+
.analyze(({ results }) => {
|
|
137
|
+
const { reasoning } = results.preprocessStepResult || {};
|
|
138
|
+
return {
|
|
139
|
+
hasReasoning: !!reasoning,
|
|
140
|
+
reasoningLength: reasoning?.length || 0,
|
|
141
|
+
hasStepByStep: reasoning?.includes("step") || false,
|
|
142
|
+
};
|
|
143
|
+
})
|
|
144
|
+
.generateScore(({ results }) => {
|
|
145
|
+
const { hasReasoning, reasoningLength } = results.analyzeStepResult || {};
|
|
146
|
+
if (!hasReasoning) return 0;
|
|
147
|
+
// Score based on reasoning length (normalized to 0-1)
|
|
148
|
+
return Math.min(reasoningLength / 500, 1);
|
|
149
|
+
})
|
|
150
|
+
.generateReason(({ results, score }) => {
|
|
151
|
+
const { hasReasoning, reasoningLength } = results.analyzeStepResult || {};
|
|
152
|
+
if (!hasReasoning) {
|
|
153
|
+
return "No reasoning was provided by the model.";
|
|
154
|
+
}
|
|
155
|
+
return `Model provided ${reasoningLength} characters of reasoning. Score: ${score}`;
|
|
156
|
+
});
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
<PropertiesTable
|
|
160
|
+
content={[
|
|
161
|
+
{
|
|
162
|
+
name: "output",
|
|
163
|
+
type: "ScorerRunOutputForAgent",
|
|
164
|
+
isOptional: true,
|
|
165
|
+
description: "The scorer run output (array of MastraDBMessage)",
|
|
166
|
+
},
|
|
167
|
+
]}
|
|
168
|
+
/>
|
|
169
|
+
|
|
170
|
+
**Returns:** `string | undefined` - The reasoning text, or undefined if no reasoning is present.
|
|
171
|
+
|
|
172
|
+
## System Message Extraction
|
|
173
|
+
|
|
174
|
+
### getSystemMessagesFromRunInput
|
|
175
|
+
|
|
176
|
+
Extracts all system messages from the run input, including both standard system messages and tagged system messages (specialized prompts like memory instructions).
|
|
177
|
+
|
|
178
|
+
```typescript
|
|
179
|
+
.preprocess(({ run }) => {
|
|
180
|
+
const systemMessages = getSystemMessagesFromRunInput(run.input);
|
|
181
|
+
return {
|
|
182
|
+
systemPromptCount: systemMessages.length,
|
|
183
|
+
systemPrompts: systemMessages
|
|
184
|
+
};
|
|
185
|
+
})
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
**Returns:** `string[]` - Array of system message strings.
|
|
189
|
+
|
|
190
|
+
### getCombinedSystemPrompt
|
|
191
|
+
|
|
192
|
+
Combines all system messages into a single prompt string, joined with double newlines.
|
|
193
|
+
|
|
194
|
+
```typescript
|
|
195
|
+
.preprocess(({ run }) => {
|
|
196
|
+
const fullSystemPrompt = getCombinedSystemPrompt(run.input);
|
|
197
|
+
return { fullSystemPrompt };
|
|
198
|
+
})
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
**Returns:** `string` - Combined system prompt string.
|
|
202
|
+
|
|
203
|
+
## Tool Call Extraction
|
|
204
|
+
|
|
205
|
+
### extractToolCalls
|
|
206
|
+
|
|
207
|
+
Extracts information about all tool calls from the run output, including tool names, call IDs, and their positions in the message array.
|
|
208
|
+
|
|
209
|
+
```typescript
|
|
210
|
+
const toolUsageScorer = createScorer({
|
|
211
|
+
id: "tool-usage",
|
|
212
|
+
description: "Evaluates tool usage patterns",
|
|
213
|
+
type: "agent",
|
|
214
|
+
})
|
|
215
|
+
.preprocess(({ run }) => {
|
|
216
|
+
const { tools, toolCallInfos } = extractToolCalls(run.output);
|
|
217
|
+
return {
|
|
218
|
+
toolsUsed: tools,
|
|
219
|
+
toolCount: tools.length,
|
|
220
|
+
toolDetails: toolCallInfos,
|
|
221
|
+
};
|
|
222
|
+
})
|
|
223
|
+
.generateScore(({ results }) => {
|
|
224
|
+
const { toolCount } = results.preprocessStepResult || {};
|
|
225
|
+
// Score based on appropriate tool usage
|
|
226
|
+
return toolCount > 0 ? 1 : 0;
|
|
227
|
+
});
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
**Returns:**
|
|
231
|
+
|
|
232
|
+
```typescript
|
|
233
|
+
{
|
|
234
|
+
tools: string[]; // Array of tool names
|
|
235
|
+
toolCallInfos: ToolCallInfo[]; // Detailed tool call information
|
|
236
|
+
}
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
Where `ToolCallInfo` is:
|
|
240
|
+
|
|
241
|
+
```typescript
|
|
242
|
+
type ToolCallInfo = {
|
|
243
|
+
toolName: string; // Name of the tool
|
|
244
|
+
toolCallId: string; // Unique call identifier
|
|
245
|
+
messageIndex: number; // Index in the output array
|
|
246
|
+
invocationIndex: number; // Index within message's tool invocations
|
|
247
|
+
};
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
## Test Utilities
|
|
251
|
+
|
|
252
|
+
These utilities help create test data for scorer development.
|
|
253
|
+
|
|
254
|
+
### createTestMessage
|
|
255
|
+
|
|
256
|
+
Creates a `MastraDBMessage` object for testing purposes.
|
|
257
|
+
|
|
258
|
+
```typescript
|
|
259
|
+
import { createTestMessage } from "@mastra/evals/scorers/utils";
|
|
260
|
+
|
|
261
|
+
const userMessage = createTestMessage({
|
|
262
|
+
content: "What is the weather?",
|
|
263
|
+
role: "user",
|
|
264
|
+
});
|
|
265
|
+
|
|
266
|
+
const assistantMessage = createTestMessage({
|
|
267
|
+
content: "The weather is sunny.",
|
|
268
|
+
role: "assistant",
|
|
269
|
+
toolInvocations: [
|
|
270
|
+
{
|
|
271
|
+
toolCallId: "call-1",
|
|
272
|
+
toolName: "weatherTool",
|
|
273
|
+
args: { location: "London" },
|
|
274
|
+
result: { temp: 20 },
|
|
275
|
+
state: "result",
|
|
276
|
+
},
|
|
277
|
+
],
|
|
278
|
+
});
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
### createAgentTestRun
|
|
282
|
+
|
|
283
|
+
Creates a complete test run object for testing scorers.
|
|
284
|
+
|
|
285
|
+
```typescript
|
|
286
|
+
import { createAgentTestRun, createTestMessage } from "@mastra/evals/scorers/utils";
|
|
287
|
+
|
|
288
|
+
const testRun = createAgentTestRun({
|
|
289
|
+
inputMessages: [
|
|
290
|
+
createTestMessage({ content: "Hello", role: "user" }),
|
|
291
|
+
],
|
|
292
|
+
output: [
|
|
293
|
+
createTestMessage({ content: "Hi there!", role: "assistant" }),
|
|
294
|
+
],
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
// Run your scorer with the test data
|
|
298
|
+
const result = await myScorer.run({
|
|
299
|
+
input: testRun.input,
|
|
300
|
+
output: testRun.output,
|
|
301
|
+
});
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
## Complete Example
|
|
305
|
+
|
|
306
|
+
Here's a complete example showing how to use multiple utilities together:
|
|
307
|
+
|
|
308
|
+
```typescript
|
|
309
|
+
import { createScorer } from "@mastra/core/evals";
|
|
310
|
+
import {
|
|
311
|
+
getAssistantMessageFromRunOutput,
|
|
312
|
+
getReasoningFromRunOutput,
|
|
313
|
+
getUserMessageFromRunInput,
|
|
314
|
+
getCombinedSystemPrompt,
|
|
315
|
+
extractToolCalls,
|
|
316
|
+
} from "@mastra/evals/scorers/utils";
|
|
317
|
+
|
|
318
|
+
const comprehensiveScorer = createScorer({
|
|
319
|
+
id: "comprehensive-analysis",
|
|
320
|
+
name: "Comprehensive Analysis",
|
|
321
|
+
description: "Analyzes all aspects of an agent response",
|
|
322
|
+
type: "agent",
|
|
323
|
+
})
|
|
324
|
+
.preprocess(({ run }) => {
|
|
325
|
+
// Extract all relevant data
|
|
326
|
+
const userMessage = getUserMessageFromRunInput(run.input);
|
|
327
|
+
const response = getAssistantMessageFromRunOutput(run.output);
|
|
328
|
+
const reasoning = getReasoningFromRunOutput(run.output);
|
|
329
|
+
const systemPrompt = getCombinedSystemPrompt(run.input);
|
|
330
|
+
const { tools, toolCallInfos } = extractToolCalls(run.output);
|
|
331
|
+
|
|
332
|
+
return {
|
|
333
|
+
userMessage,
|
|
334
|
+
response,
|
|
335
|
+
reasoning,
|
|
336
|
+
systemPrompt,
|
|
337
|
+
toolsUsed: tools,
|
|
338
|
+
toolCount: tools.length,
|
|
339
|
+
};
|
|
340
|
+
})
|
|
341
|
+
.generateScore(({ results }) => {
|
|
342
|
+
const { response, reasoning, toolCount } = results.preprocessStepResult || {};
|
|
343
|
+
|
|
344
|
+
let score = 0;
|
|
345
|
+
if (response && response.length > 0) score += 0.4;
|
|
346
|
+
if (reasoning) score += 0.3;
|
|
347
|
+
if (toolCount > 0) score += 0.3;
|
|
348
|
+
|
|
349
|
+
return score;
|
|
350
|
+
})
|
|
351
|
+
.generateReason(({ results, score }) => {
|
|
352
|
+
const { response, reasoning, toolCount } = results.preprocessStepResult || {};
|
|
353
|
+
|
|
354
|
+
const parts = [];
|
|
355
|
+
if (response) parts.push("provided a response");
|
|
356
|
+
if (reasoning) parts.push("included reasoning");
|
|
357
|
+
if (toolCount > 0) parts.push(`used ${toolCount} tool(s)`);
|
|
358
|
+
|
|
359
|
+
return `Score: ${score}. The agent ${parts.join(", ")}.`;
|
|
360
|
+
});
|
|
361
|
+
```
|
|
362
|
+
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: OtelBridge | Observability"
|
|
3
|
+
description: OpenTelemetry bridge for Tracing
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
import PropertiesTable from "@site/src/components/PropertiesTable";
|
|
7
|
+
|
|
8
|
+
# OtelBridge
|
|
9
|
+
|
|
10
|
+
:::warning
|
|
11
|
+
|
|
12
|
+
The OpenTelemetry Bridge is currently **experimental**. APIs and configuration options may change in future releases.
|
|
13
|
+
|
|
14
|
+
:::
|
|
15
|
+
|
|
16
|
+
Enables bidirectional integration between Mastra tracing and OpenTelemetry infrastructure. Creates native OTEL spans for Mastra operations and inherits context from active OTEL spans.
|
|
17
|
+
|
|
18
|
+
## Constructor
|
|
19
|
+
|
|
20
|
+
```typescript
|
|
21
|
+
new OtelBridge()
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Methods
|
|
25
|
+
|
|
26
|
+
### executeInContext
|
|
27
|
+
|
|
28
|
+
```typescript
|
|
29
|
+
executeInContext<T>(spanId: string, fn: () => Promise<T>): Promise<T>
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Executes an async function within the OTEL context of a Mastra span. OTEL-instrumented code running inside the function will have correct parent relationships.
|
|
33
|
+
|
|
34
|
+
<PropertiesTable
|
|
35
|
+
props={[
|
|
36
|
+
{
|
|
37
|
+
name: "spanId",
|
|
38
|
+
type: "string",
|
|
39
|
+
description: "The ID of the Mastra span to use as context",
|
|
40
|
+
required: true,
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
name: "fn",
|
|
44
|
+
type: "() => Promise<T>",
|
|
45
|
+
description: "The async function to execute within the span context",
|
|
46
|
+
required: true,
|
|
47
|
+
},
|
|
48
|
+
]}
|
|
49
|
+
/>
|
|
50
|
+
|
|
51
|
+
**Returns:** `Promise<T>` - The result of the function execution.
|
|
52
|
+
|
|
53
|
+
### executeInContextSync
|
|
54
|
+
|
|
55
|
+
```typescript
|
|
56
|
+
executeInContextSync<T>(spanId: string, fn: () => T): T
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Executes a synchronous function within the OTEL context of a Mastra span.
|
|
60
|
+
|
|
61
|
+
<PropertiesTable
|
|
62
|
+
props={[
|
|
63
|
+
{
|
|
64
|
+
name: "spanId",
|
|
65
|
+
type: "string",
|
|
66
|
+
description: "The ID of the Mastra span to use as context",
|
|
67
|
+
required: true,
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
name: "fn",
|
|
71
|
+
type: "() => T",
|
|
72
|
+
description: "The synchronous function to execute within the span context",
|
|
73
|
+
required: true,
|
|
74
|
+
},
|
|
75
|
+
]}
|
|
76
|
+
/>
|
|
77
|
+
|
|
78
|
+
**Returns:** `T` - The result of the function execution.
|
|
79
|
+
|
|
80
|
+
### shutdown
|
|
81
|
+
|
|
82
|
+
```typescript
|
|
83
|
+
async shutdown(): Promise<void>
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
Shuts down the bridge and cleans up resources. Ends any spans that were not properly closed.
|
|
87
|
+
|
|
88
|
+
## Usage Examples
|
|
89
|
+
|
|
90
|
+
### Basic Usage
|
|
91
|
+
|
|
92
|
+
```typescript
|
|
93
|
+
import { Mastra } from "@mastra/core";
|
|
94
|
+
import { Observability } from "@mastra/observability";
|
|
95
|
+
import { OtelBridge } from "@mastra/otel-bridge";
|
|
96
|
+
|
|
97
|
+
const mastra = new Mastra({
|
|
98
|
+
observability: new Observability({
|
|
99
|
+
configs: {
|
|
100
|
+
default: {
|
|
101
|
+
serviceName: "my-service",
|
|
102
|
+
bridge: new OtelBridge(),
|
|
103
|
+
},
|
|
104
|
+
},
|
|
105
|
+
}),
|
|
106
|
+
agents: { myAgent },
|
|
107
|
+
});
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### Combined with Exporters
|
|
111
|
+
|
|
112
|
+
The bridge can be used alongside exporters. The bridge handles OTEL context, while exporters send data to additional destinations:
|
|
113
|
+
|
|
114
|
+
```typescript
|
|
115
|
+
import { Mastra } from "@mastra/core";
|
|
116
|
+
import { Observability, DefaultExporter } from "@mastra/observability";
|
|
117
|
+
import { OtelBridge } from "@mastra/otel-bridge";
|
|
118
|
+
import { LangfuseExporter } from "@mastra/langfuse";
|
|
119
|
+
|
|
120
|
+
const mastra = new Mastra({
|
|
121
|
+
observability: new Observability({
|
|
122
|
+
configs: {
|
|
123
|
+
default: {
|
|
124
|
+
serviceName: "my-service",
|
|
125
|
+
bridge: new OtelBridge(), // Handles OTEL context
|
|
126
|
+
exporters: [
|
|
127
|
+
new DefaultExporter(), // Studio access
|
|
128
|
+
new LangfuseExporter({
|
|
129
|
+
// Additional destination
|
|
130
|
+
publicKey: process.env.LANGFUSE_PUBLIC_KEY,
|
|
131
|
+
secretKey: process.env.LANGFUSE_SECRET_KEY,
|
|
132
|
+
}),
|
|
133
|
+
],
|
|
134
|
+
},
|
|
135
|
+
},
|
|
136
|
+
}),
|
|
137
|
+
});
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## OpenTelemetry Setup Requirements
|
|
141
|
+
|
|
142
|
+
The OtelBridge requires an active OpenTelemetry SDK to function. The bridge reads from OTEL's ambient context.
|
|
143
|
+
|
|
144
|
+
See the [OtelBridge Guide](/docs/v1/observability/tracing/bridges/otel#configuration) for complete setup instructions, including how to configure OTEL instrumentation and run your application.
|
|
145
|
+
|
|
146
|
+
## Tags Support
|
|
147
|
+
|
|
148
|
+
The OtelBridge supports trace tagging for categorization and filtering. Tags are only applied to root spans and are included as the `mastra.tags` attribute on native OTEL spans.
|
|
149
|
+
|
|
150
|
+
### Usage
|
|
151
|
+
|
|
152
|
+
```typescript
|
|
153
|
+
const result = await agent.generate({
|
|
154
|
+
messages: [{ role: "user", content: "Hello" }],
|
|
155
|
+
tracingOptions: {
|
|
156
|
+
tags: ["production", "experiment-v2", "user-request"],
|
|
157
|
+
},
|
|
158
|
+
});
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
### How Tags Are Stored
|
|
162
|
+
|
|
163
|
+
Tags are stored as a JSON-stringified array in the `mastra.tags` span attribute:
|
|
164
|
+
|
|
165
|
+
```json
|
|
166
|
+
{
|
|
167
|
+
"mastra.tags": "[\"production\",\"experiment-v2\",\"user-request\"]"
|
|
168
|
+
}
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
This format ensures compatibility with all OTEL-compatible backends and collectors.
|
|
172
|
+
|
|
173
|
+
## Related
|
|
174
|
+
|
|
175
|
+
- [OtelBridge Guide](/docs/v1/observability/tracing/bridges/otel) - Setup guide with examples
|
|
176
|
+
- [Tracing Overview](/docs/v1/observability/tracing/overview) - General tracing concepts
|
|
177
|
+
- [OtelExporter Reference](/reference/v1/observability/tracing/exporters/otel) - OTEL exporter for sending traces
|
|
@@ -232,10 +232,6 @@ Shuts down all observability instances and clears the registry.
|
|
|
232
232
|
- [Interfaces](/reference/v1/observability/tracing/interfaces) - Type definitions
|
|
233
233
|
- [Spans Reference](/reference/v1/observability/tracing/spans) - Span lifecycle
|
|
234
234
|
|
|
235
|
-
### Examples
|
|
236
|
-
|
|
237
|
-
- [Basic Tracing](/examples/v1/observability/basic-ai-tracing) - Getting started
|
|
238
|
-
|
|
239
235
|
### Exporters
|
|
240
236
|
|
|
241
237
|
- [DefaultExporter](/reference/v1/observability/tracing/exporters/default-exporter) - Storage configuration
|
|
@@ -38,6 +38,10 @@ Inherits from `OtelExporterConfig` (excluding `provider`), which includes:
|
|
|
38
38
|
- `logLevel?: LogLevel | 'debug' | 'info' | 'warn' | 'error'` - Log level (default: WARN)
|
|
39
39
|
- `resourceAttributes?: Record<string, any>` - Custom resource attributes
|
|
40
40
|
|
|
41
|
+
### Metadata passthrough
|
|
42
|
+
|
|
43
|
+
Non-reserved span attributes are serialized into the OpenInference `metadata` payload. Add them via `tracingOptions.metadata` (e.g., `companyId`, `tier`). Reserved fields such as `input`, `output`, `sessionId`, thread/user IDs, and OpenInference IDs are excluded automatically.
|
|
44
|
+
|
|
41
45
|
<PropertiesTable
|
|
42
46
|
props={[
|
|
43
47
|
{
|
|
@@ -157,6 +161,31 @@ const exporter = new ArizeExporter({
|
|
|
157
161
|
|
|
158
162
|
The ArizeExporter implements [OpenInference Semantic Conventions](https://github.com/Arize-ai/openinference/tree/main/spec) for generative AI applications, providing standardized trace structure across different observability platforms.
|
|
159
163
|
|
|
164
|
+
## Tags Support
|
|
165
|
+
|
|
166
|
+
The ArizeExporter supports trace tagging for categorization and filtering. Tags are only applied to root spans and are mapped to the native OpenInference `tag.tags` semantic convention.
|
|
167
|
+
|
|
168
|
+
### Usage
|
|
169
|
+
|
|
170
|
+
```typescript
|
|
171
|
+
const result = await agent.generate({
|
|
172
|
+
messages: [{ role: "user", content: "Hello" }],
|
|
173
|
+
tracingOptions: {
|
|
174
|
+
tags: ["production", "experiment-v2", "user-request"],
|
|
175
|
+
},
|
|
176
|
+
});
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
### How Tags Are Stored
|
|
180
|
+
|
|
181
|
+
Tags are stored using the OpenInference `tag.tags` attribute:
|
|
182
|
+
|
|
183
|
+
```json
|
|
184
|
+
{
|
|
185
|
+
"tag.tags": ["production", "experiment-v2", "user-request"]
|
|
186
|
+
}
|
|
187
|
+
```
|
|
188
|
+
|
|
160
189
|
## Related
|
|
161
190
|
|
|
162
191
|
- [ArizeExporter Documentation](/docs/v1/observability/tracing/exporters/arize)
|
|
@@ -117,3 +117,46 @@ const exporter = new LangfuseExporter({
|
|
|
117
117
|
- `MODEL_GENERATION` spans → Langfuse generations
|
|
118
118
|
- All other spans → Langfuse spans
|
|
119
119
|
- Event spans → Langfuse events
|
|
120
|
+
|
|
121
|
+
## Prompt Linking
|
|
122
|
+
|
|
123
|
+
Link LLM generations to [Langfuse Prompt Management](https://langfuse.com/docs/prompt-management) using the `withLangfusePrompt` helper:
|
|
124
|
+
|
|
125
|
+
```typescript
|
|
126
|
+
import { buildTracingOptions } from "@mastra/observability";
|
|
127
|
+
import { withLangfusePrompt } from "@mastra/langfuse";
|
|
128
|
+
import { Langfuse } from "langfuse";
|
|
129
|
+
|
|
130
|
+
const langfuse = new Langfuse({
|
|
131
|
+
publicKey: process.env.LANGFUSE_PUBLIC_KEY!,
|
|
132
|
+
secretKey: process.env.LANGFUSE_SECRET_KEY!,
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
const prompt = await langfuse.getPrompt("customer-support");
|
|
136
|
+
|
|
137
|
+
const agent = new Agent({
|
|
138
|
+
name: "support-agent",
|
|
139
|
+
instructions: prompt.prompt,
|
|
140
|
+
model: openai("gpt-4o"),
|
|
141
|
+
defaultGenerateOptions: {
|
|
142
|
+
tracingOptions: buildTracingOptions(withLangfusePrompt(prompt)),
|
|
143
|
+
},
|
|
144
|
+
});
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### Helper Functions
|
|
148
|
+
|
|
149
|
+
#### `withLangfusePrompt(prompt)`
|
|
150
|
+
|
|
151
|
+
Adds Langfuse prompt metadata to tracing options.
|
|
152
|
+
|
|
153
|
+
```typescript
|
|
154
|
+
// With Langfuse SDK prompt object
|
|
155
|
+
withLangfusePrompt(prompt)
|
|
156
|
+
|
|
157
|
+
// With manual fields
|
|
158
|
+
withLangfusePrompt({ name: "my-prompt", version: 1 })
|
|
159
|
+
withLangfusePrompt({ id: "prompt-uuid" })
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
When `metadata.langfuse.prompt` is set on a `MODEL_GENERATION` span (with either `id` alone, or `name` + `version`), the exporter automatically links the generation to the prompt in Langfuse.
|
|
@@ -20,6 +20,7 @@ new LangSmithExporter(config: LangSmithExporterConfig)
|
|
|
20
20
|
```typescript
|
|
21
21
|
interface LangSmithExporterConfig extends ClientConfig, BaseExporterConfig {
|
|
22
22
|
client?: Client;
|
|
23
|
+
projectName?: string;
|
|
23
24
|
}
|
|
24
25
|
```
|
|
25
26
|
|
|
@@ -32,7 +33,13 @@ Extends both `ClientConfig` (from LangSmith SDK) and `BaseExporterConfig`:
|
|
|
32
33
|
{
|
|
33
34
|
name: "apiKey",
|
|
34
35
|
type: "string",
|
|
35
|
-
description: "LangSmith API key",
|
|
36
|
+
description: "LangSmith API key. Defaults to LANGSMITH_API_KEY env var.",
|
|
37
|
+
required: false,
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
name: "projectName",
|
|
41
|
+
type: "string",
|
|
42
|
+
description: "The LangSmith project to send traces to. Overrides LANGCHAIN_PROJECT env var. Defaults to 'default'.",
|
|
36
43
|
required: false,
|
|
37
44
|
},
|
|
38
45
|
{
|
|
@@ -99,11 +106,20 @@ import { LangSmithExporter } from "@mastra/langsmith";
|
|
|
99
106
|
|
|
100
107
|
const exporter = new LangSmithExporter({
|
|
101
108
|
apiKey: process.env.LANGSMITH_API_KEY,
|
|
109
|
+
projectName: "my-project", // Optional: specify which project to send traces to
|
|
102
110
|
apiUrl: "https://api.smith.langchain.com",
|
|
103
111
|
logLevel: "info",
|
|
104
112
|
});
|
|
105
113
|
```
|
|
106
114
|
|
|
115
|
+
## Environment Variables
|
|
116
|
+
|
|
117
|
+
| Variable | Description |
|
|
118
|
+
|----------|-------------|
|
|
119
|
+
| `LANGSMITH_API_KEY` | Your LangSmith API key |
|
|
120
|
+
| `LANGCHAIN_PROJECT` | Default project name for traces (used if `projectName` not specified) |
|
|
121
|
+
| `LANGSMITH_BASE_URL` | API URL for self-hosted instances |
|
|
122
|
+
|
|
107
123
|
## Span Type Mapping
|
|
108
124
|
|
|
109
125
|
| Span Type | LangSmith Type |
|