@mastra/mcp-docs-server 0.13.17-alpha.3 → 0.13.17-alpha.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/.docs/organized/changelogs/%40mastra%2Fagent-builder.md +15 -0
  2. package/.docs/organized/changelogs/%40mastra%2Fai-sdk.md +10 -0
  3. package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +17 -17
  4. package/.docs/organized/changelogs/%40mastra%2Fcore.md +35 -35
  5. package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloud.md +8 -0
  6. package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +12 -12
  7. package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +20 -20
  8. package/.docs/organized/changelogs/%40mastra%2Fevals.md +10 -10
  9. package/.docs/organized/changelogs/%40mastra%2Ffirecrawl.md +10 -10
  10. package/.docs/organized/changelogs/%40mastra%2Fgithub.md +11 -11
  11. package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +15 -15
  12. package/.docs/organized/changelogs/%40mastra%2Fmcp-registry-registry.md +11 -11
  13. package/.docs/organized/changelogs/%40mastra%2Fmcp.md +10 -10
  14. package/.docs/organized/changelogs/%40mastra%2Fmemory.md +14 -14
  15. package/.docs/organized/changelogs/%40mastra%2Fpg.md +10 -10
  16. package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +21 -21
  17. package/.docs/organized/changelogs/%40mastra%2Fragie.md +10 -10
  18. package/.docs/organized/changelogs/%40mastra%2Fschema-compat.md +7 -0
  19. package/.docs/organized/changelogs/%40mastra%2Fserver.md +16 -16
  20. package/.docs/organized/changelogs/%40mastra%2Fvoice-google-gemini-live.md +10 -0
  21. package/.docs/organized/changelogs/%40mastra%2Fvoice-openai-realtime.md +11 -11
  22. package/.docs/organized/changelogs/create-mastra.md +7 -7
  23. package/.docs/organized/changelogs/mastra.md +19 -19
  24. package/.docs/organized/code-examples/a2a.md +1 -1
  25. package/.docs/organized/code-examples/agent-network.md +1 -1
  26. package/.docs/organized/code-examples/agent.md +22 -1
  27. package/.docs/organized/code-examples/agui.md +1 -1
  28. package/.docs/organized/code-examples/ai-sdk-useChat.md +1 -1
  29. package/.docs/organized/code-examples/ai-sdk-v5.md +2 -2
  30. package/.docs/organized/code-examples/assistant-ui.md +3 -3
  31. package/.docs/organized/code-examples/bird-checker-with-express.md +1 -1
  32. package/.docs/organized/code-examples/bird-checker-with-nextjs-and-eval.md +1 -1
  33. package/.docs/organized/code-examples/bird-checker-with-nextjs.md +1 -1
  34. package/.docs/organized/code-examples/client-side-tools.md +1 -1
  35. package/.docs/organized/code-examples/crypto-chatbot.md +1 -1
  36. package/.docs/organized/code-examples/experimental-auth-weather-agent.md +1 -1
  37. package/.docs/organized/code-examples/fireworks-r1.md +1 -1
  38. package/.docs/organized/code-examples/mcp-configuration.md +2 -2
  39. package/.docs/organized/code-examples/mcp-registry-registry.md +1 -1
  40. package/.docs/organized/code-examples/memory-with-mem0.md +1 -1
  41. package/.docs/organized/code-examples/memory-with-processors.md +1 -1
  42. package/.docs/organized/code-examples/openapi-spec-writer.md +2 -2
  43. package/.docs/organized/code-examples/quick-start.md +1 -1
  44. package/.docs/organized/code-examples/stock-price-tool.md +1 -1
  45. package/.docs/organized/code-examples/weather-agent.md +1 -1
  46. package/.docs/organized/code-examples/workflow-ai-recruiter.md +1 -1
  47. package/.docs/organized/code-examples/workflow-with-inline-steps.md +1 -1
  48. package/.docs/organized/code-examples/workflow-with-memory.md +1 -1
  49. package/.docs/organized/code-examples/workflow-with-separate-steps.md +1 -1
  50. package/.docs/organized/code-examples/workflow-with-suspend-resume.md +1 -1
  51. package/.docs/raw/agents/overview.mdx +35 -4
  52. package/.docs/raw/deployment/monorepo.mdx +1 -1
  53. package/.docs/raw/frameworks/agentic-uis/ai-sdk.mdx +44 -14
  54. package/.docs/raw/getting-started/installation.mdx +52 -4
  55. package/.docs/raw/getting-started/templates.mdx +2 -22
  56. package/.docs/raw/reference/agents/generate.mdx +2 -2
  57. package/.docs/raw/reference/agents/getDefaultStreamOptions.mdx +2 -1
  58. package/.docs/raw/reference/agents/getDefaultVNextStreamOptions.mdx +1 -1
  59. package/.docs/raw/reference/agents/stream.mdx +2 -2
  60. package/.docs/raw/reference/cli/build.mdx +0 -6
  61. package/.docs/raw/reference/cli/start.mdx +8 -1
  62. package/.docs/raw/reference/scorers/noise-sensitivity.mdx +237 -0
  63. package/.docs/raw/reference/scorers/prompt-alignment.mdx +369 -0
  64. package/.docs/raw/scorers/off-the-shelf-scorers.mdx +2 -2
  65. package/.docs/raw/streaming/overview.mdx +2 -2
  66. package/.docs/raw/streaming/tool-streaming.mdx +8 -2
  67. package/.docs/raw/streaming/workflow-streaming.mdx +8 -2
  68. package/.docs/raw/tools-mcp/overview.mdx +44 -0
  69. package/.docs/raw/workflows/overview.mdx +19 -17
  70. package/dist/stdio.js +5 -1
  71. package/dist/tools/docs.d.ts.map +1 -1
  72. package/package.json +6 -6
@@ -0,0 +1,369 @@
1
+ ---
2
+ title: "Reference: Prompt Alignment Scorer | Scorers | Mastra Docs"
3
+ description: Documentation for the Prompt Alignment Scorer in Mastra. Evaluates how well agent responses align with user prompt intent, requirements, completeness, and appropriateness using multi-dimensional analysis.
4
+ ---
5
+
6
+ import { PropertiesTable } from "@/components/properties-table";
7
+
8
+ # Prompt Alignment Scorer
9
+
10
+ The `createPromptAlignmentScorerLLM()` function creates a scorer that evaluates how well agent responses align with user prompts across multiple dimensions: intent understanding, requirement fulfillment, response completeness, and format appropriateness.
11
+
12
+ ## Parameters
13
+
14
+ <PropertiesTable
15
+ content={[
16
+ {
17
+ name: "model",
18
+ type: "MastraLanguageModel",
19
+ description: "The language model to use for evaluating prompt-response alignment",
20
+ required: true,
21
+ },
22
+ {
23
+ name: "options",
24
+ type: "PromptAlignmentOptions",
25
+ description: "Configuration options for the scorer",
26
+ required: false,
27
+ children: [
28
+ {
29
+ name: "scale",
30
+ type: "number",
31
+ description: "Scale factor to multiply the final score (default: 1)",
32
+ required: false,
33
+ },
34
+ {
35
+ name: "evaluationMode",
36
+ type: "'user' | 'system' | 'both'",
37
+ description: "Evaluation mode - 'user' evaluates user prompt alignment only, 'system' evaluates system compliance only, 'both' evaluates both with weighted scoring (default: 'both')",
38
+ required: false,
39
+ },
40
+ ],
41
+ },
42
+ ]}
43
+ />
44
+
45
+ ## .run() Returns
46
+
47
+ <PropertiesTable
48
+ content={[
49
+ {
50
+ name: "score",
51
+ type: "number",
52
+ description: "Multi-dimensional alignment score between 0 and scale (default 0-1)",
53
+ },
54
+ {
55
+ name: "reason",
56
+ type: "string",
57
+ description: "Human-readable explanation of the prompt alignment evaluation with detailed breakdown",
58
+ },
59
+ ]}
60
+ />
61
+
62
+ ## Scoring Details
63
+
64
+ ### Multi-Dimensional Analysis
65
+
66
+ Prompt Alignment evaluates responses across four key dimensions with weighted scoring that adapts based on the evaluation mode:
67
+
68
+ #### User Mode ('user')
69
+ Evaluates alignment with user prompts only:
70
+
71
+ 1. **Intent Alignment** (40% weight) - Whether the response addresses the user's core request
72
+ 2. **Requirements Fulfillment** (30% weight) - If all user requirements are met
73
+ 3. **Completeness** (20% weight) - Whether the response is comprehensive for user needs
74
+ 4. **Response Appropriateness** (10% weight) - If format and tone match user expectations
75
+
76
+ #### System Mode ('system')
77
+ Evaluates compliance with system guidelines only:
78
+
79
+ 1. **Intent Alignment** (35% weight) - Whether the response follows system behavioral guidelines
80
+ 2. **Requirements Fulfillment** (35% weight) - If all system constraints are respected
81
+ 3. **Completeness** (15% weight) - Whether the response adheres to all system rules
82
+ 4. **Response Appropriateness** (15% weight) - If format and tone match system specifications
83
+
84
+ #### Both Mode ('both' - default)
85
+ Combines evaluation of both user and system alignment:
86
+
87
+ - **User alignment**: 70% of final score (using user mode weights)
88
+ - **System compliance**: 30% of final score (using system mode weights)
89
+ - Provides balanced assessment of user satisfaction and system adherence
90
+
91
+ ### Scoring Formula
92
+
93
+ **User Mode:**
94
+ ```
95
+ Weighted Score = (intent_score × 0.4) + (requirements_score × 0.3) +
96
+ (completeness_score × 0.2) + (appropriateness_score × 0.1)
97
+ Final Score = Weighted Score × scale
98
+ ```
99
+
100
+ **System Mode:**
101
+ ```
102
+ Weighted Score = (intent_score × 0.35) + (requirements_score × 0.35) +
103
+ (completeness_score × 0.15) + (appropriateness_score × 0.15)
104
+ Final Score = Weighted Score × scale
105
+ ```
106
+
107
+ **Both Mode (default):**
108
+ ```
109
+ User Score = (user dimensions with user weights)
110
+ System Score = (system dimensions with system weights)
111
+ Weighted Score = (User Score × 0.7) + (System Score × 0.3)
112
+ Final Score = Weighted Score × scale
113
+ ```
114
+
115
+ **Weight Distribution Rationale**:
116
+ - **User Mode**: Prioritizes intent (40%) and requirements (30%) for user satisfaction
117
+ - **System Mode**: Balances behavioral compliance (35%) and constraints (35%) equally
118
+ - **Both Mode**: 70/30 split ensures user needs are primary while maintaining system compliance
119
+
120
+ ### Score Interpretation
121
+
122
+ - **0.9-1.0** = Excellent alignment across all dimensions
123
+ - **0.8-0.9** = Very good alignment with minor gaps
124
+ - **0.7-0.8** = Good alignment but missing some requirements or completeness
125
+ - **0.6-0.7** = Moderate alignment with noticeable gaps
126
+ - **0.4-0.6** = Poor alignment with significant issues
127
+ - **0.0-0.4** = Very poor alignment, response doesn't address the prompt effectively
128
+
129
+ ### Comparison with Other Scorers
130
+
131
+ | Aspect | Prompt Alignment | Answer Relevancy | Faithfulness |
132
+ |--------|------------------|------------------|--------------|
133
+ | **Focus** | Multi-dimensional prompt adherence | Query-response relevance | Context groundedness |
134
+ | **Evaluation** | Intent, requirements, completeness, format | Semantic similarity to query | Factual consistency with context |
135
+ | **Use Case** | General prompt following | Information retrieval | RAG/context-based systems |
136
+ | **Dimensions** | 4 weighted dimensions | Single relevance dimension | Single faithfulness dimension |
137
+
138
+ ### When to Use Each Mode
139
+
140
+ **User Mode (`'user'`)** - Use when:
141
+ - Evaluating customer service responses for user satisfaction
142
+ - Testing content generation quality from user perspective
143
+ - Measuring how well responses address user questions
144
+ - Focusing purely on request fulfillment without system constraints
145
+
146
+ **System Mode (`'system'`)** - Use when:
147
+ - Auditing AI safety and compliance with behavioral guidelines
148
+ - Ensuring agents follow brand voice and tone requirements
149
+ - Validating adherence to content policies and constraints
150
+ - Testing system-level behavioral consistency
151
+
152
+ **Both Mode (`'both'`)** - Use when (default, recommended):
153
+ - Comprehensive evaluation of overall AI agent performance
154
+ - Balancing user satisfaction with system compliance
155
+ - Production monitoring where both user and system requirements matter
156
+ - Holistic assessment of prompt-response alignment
157
+
158
+ ## Usage Examples
159
+
160
+ ### Basic Configuration
161
+
162
+ ```typescript
163
+ import { openai } from '@ai-sdk/openai';
164
+ import { createPromptAlignmentScorerLLM } from '@mastra/evals';
165
+
166
+ const scorer = createPromptAlignmentScorerLLM({
167
+ model: openai('gpt-4o'),
168
+ });
169
+
170
+ // Evaluate a code generation task
171
+ const result = await scorer.run({
172
+ input: [{
173
+ role: 'user',
174
+ content: 'Write a Python function to calculate factorial with error handling'
175
+ }],
176
+ output: {
177
+ role: 'assistant',
178
+ text: `def factorial(n):
179
+ if n < 0:
180
+ raise ValueError("Factorial not defined for negative numbers")
181
+ if n == 0:
182
+ return 1
183
+ return n * factorial(n-1)`
184
+ }
185
+ });
186
+ // Result: { score: 0.95, reason: "Excellent alignment - function addresses intent, includes error handling..." }
187
+ ```
188
+
189
+ ### Custom Configuration Examples
190
+
191
+ ```typescript
192
+ // Configure scale and evaluation mode
193
+ const scorer = createPromptAlignmentScorerLLM({
194
+ model: openai('gpt-4o'),
195
+ options: {
196
+ scale: 10, // Score from 0-10 instead of 0-1
197
+ evaluationMode: 'both' // 'user', 'system', or 'both' (default)
198
+ },
199
+ });
200
+
201
+ // User-only evaluation - focus on user satisfaction
202
+ const userScorer = createPromptAlignmentScorerLLM({
203
+ model: openai('gpt-4o'),
204
+ options: { evaluationMode: 'user' }
205
+ });
206
+
207
+ // System-only evaluation - focus on compliance
208
+ const systemScorer = createPromptAlignmentScorerLLM({
209
+ model: openai('gpt-4o'),
210
+ options: { evaluationMode: 'system' }
211
+ });
212
+
213
+ const result = await scorer.run(testRun);
214
+ // Result: { score: 8.5, reason: "Score: 8.5 out of 10 - Good alignment with both user intent and system guidelines..." }
215
+ ```
216
+
217
+ ### Format-Specific Evaluation
218
+
219
+ ```typescript
220
+ // Evaluate bullet point formatting
221
+ const result = await scorer.run({
222
+ input: [{
223
+ role: 'user',
224
+ content: 'List the benefits of TypeScript in bullet points'
225
+ }],
226
+ output: {
227
+ role: 'assistant',
228
+ text: 'TypeScript provides static typing, better IDE support, and enhanced code reliability.'
229
+ }
230
+ });
231
+ // Result: Lower appropriateness score due to format mismatch (paragraph vs bullet points)
232
+ ```
233
+
234
+ ## Usage Patterns
235
+
236
+ ### Code Generation Evaluation
237
+ Ideal for evaluating:
238
+ - Programming task completion
239
+ - Code quality and completeness
240
+ - Adherence to coding requirements
241
+ - Format specifications (functions, classes, etc.)
242
+
243
+ ```typescript
244
+ // Example: API endpoint creation
245
+ const codePrompt = "Create a REST API endpoint with authentication and rate limiting";
246
+ // Scorer evaluates: intent (API creation), requirements (auth + rate limiting),
247
+ // completeness (full implementation), format (code structure)
248
+ ```
249
+
250
+ ### Instruction Following Assessment
251
+ Perfect for:
252
+ - Task completion verification
253
+ - Multi-step instruction adherence
254
+ - Requirement compliance checking
255
+ - Educational content evaluation
256
+
257
+ ```typescript
258
+ // Example: Multi-requirement task
259
+ const taskPrompt = "Write a Python class with initialization, validation, error handling, and documentation";
260
+ // Scorer tracks each requirement individually and provides detailed breakdown
261
+ ```
262
+
263
+ ### Content Format Validation
264
+ Useful for:
265
+ - Format specification compliance
266
+ - Style guide adherence
267
+ - Output structure verification
268
+ - Response appropriateness checking
269
+
270
+ ```typescript
271
+ // Example: Structured output
272
+ const formatPrompt = "Explain the differences between let and const in JavaScript using bullet points";
273
+ // Scorer evaluates content accuracy AND format compliance
274
+ ```
275
+
276
+ ## Common Use Cases
277
+
278
+ ### 1. Agent Response Quality
279
+ Measure how well your AI agents follow user instructions:
280
+
281
+ ```typescript
282
+ const agent = new Agent({
283
+ name: 'CodingAssistant',
284
+ instructions: 'You are a helpful coding assistant. Always provide working code examples.',
285
+ model: openai('gpt-4o'),
286
+ });
287
+
288
+ // Evaluate comprehensive alignment (default)
289
+ const scorer = createPromptAlignmentScorerLLM({
290
+ model: openai('gpt-4o-mini'),
291
+ options: { evaluationMode: 'both' } // Evaluates both user intent and system guidelines
292
+ });
293
+
294
+ // Evaluate just user satisfaction
295
+ const userScorer = createPromptAlignmentScorerLLM({
296
+ model: openai('gpt-4o-mini'),
297
+ options: { evaluationMode: 'user' } // Focus only on user request fulfillment
298
+ });
299
+
300
+ // Evaluate system compliance
301
+ const systemScorer = createPromptAlignmentScorerLLM({
302
+ model: openai('gpt-4o-mini'),
303
+ options: { evaluationMode: 'system' } // Check adherence to system instructions
304
+ });
305
+
306
+ const result = await scorer.run(agentRun);
307
+ ```
308
+
309
+ ### 2. Prompt Engineering Optimization
310
+ Test different prompts to improve alignment:
311
+
312
+ ```typescript
313
+ const prompts = [
314
+ 'Write a function to calculate factorial',
315
+ 'Create a Python function that calculates factorial with error handling for negative inputs',
316
+ 'Implement a factorial calculator in Python with: input validation, error handling, and docstring'
317
+ ];
318
+
319
+ // Compare alignment scores to find the best prompt
320
+ for (const prompt of prompts) {
321
+ const result = await scorer.run(createTestRun(prompt, response));
322
+ console.log(`Prompt alignment: ${result.score}`);
323
+ }
324
+ ```
325
+
326
+ ### 3. Multi-Agent System Evaluation
327
+ Compare different agents or models:
328
+
329
+ ```typescript
330
+ const agents = [agent1, agent2, agent3];
331
+ const testPrompts = [...]; // Array of test prompts
332
+
333
+ for (const agent of agents) {
334
+ let totalScore = 0;
335
+ for (const prompt of testPrompts) {
336
+ const response = await agent.run(prompt);
337
+ const evaluation = await scorer.run({ input: prompt, output: response });
338
+ totalScore += evaluation.score;
339
+ }
340
+ console.log(`${agent.name} average alignment: ${totalScore / testPrompts.length}`);
341
+ }
342
+ ```
343
+
344
+ ## Error Handling
345
+
346
+ The scorer handles various edge cases gracefully:
347
+
348
+ ```typescript
349
+ // Missing user prompt
350
+ try {
351
+ await scorer.run({ input: [], output: response });
352
+ } catch (error) {
353
+ // Error: "Both user prompt and agent response are required for prompt alignment scoring"
354
+ }
355
+
356
+ // Empty response
357
+ const result = await scorer.run({
358
+ input: [userMessage],
359
+ output: { role: 'assistant', text: '' }
360
+ });
361
+ // Returns low scores with detailed reasoning about incompleteness
362
+ ```
363
+
364
+ ## Related
365
+
366
+ - [Answer Relevancy Scorer](/reference/scorers/answer-relevancy) - Evaluates query-response relevance
367
+ - [Faithfulness Scorer](/reference/scorers/faithfulness) - Measures context groundedness
368
+ - [Tool Call Accuracy Scorer](/reference/scorers/tool-call-accuracy) - Evaluates tool selection
369
+ - [Custom Scorers](/docs/scorers/custom-scorers) - Creating your own evaluation metrics
@@ -20,6 +20,7 @@ These scorers evaluate how correct, truthful, and complete your agent's answers
20
20
  - [`content-similarity`](/reference/scorers/content-similarity): Measures textual similarity using character-level matching (`0-1`, higher is better)
21
21
  - [`textual-difference`](/reference/scorers/textual-difference): Measures textual differences between strings (`0-1`, higher means more similar)
22
22
  - [`tool-call-accuracy`](/reference/scorers/tool-call-accuracy): Evaluates whether the LLM selects the correct tool from available options (`0-1`, higher is better)
23
+ - [`prompt-alignment`](/reference/scorers/prompt-alignment): Measures how well agent responses align with user prompt intent, requirements, completeness, and format (`0-1`, higher is better)
23
24
 
24
25
  ### Context Quality
25
26
 
@@ -28,14 +29,13 @@ These scorers evaluate the quality and relevance of context used in generating r
28
29
  - [`context-precision`](/reference/scorers/context-precision): Evaluates context relevance and ranking using Mean Average Precision, rewarding early placement of relevant context (`0-1`, higher is better)
29
30
  - [`context-relevance`](/reference/scorers/context-relevance): Measures context utility with nuanced relevance levels, usage tracking, and missing context detection (`0-1`, higher is better)
30
31
 
31
- :::tip Context Scorer Selection
32
+ > tip Context Scorer Selection
32
33
  - Use **Context Precision** when context ordering matters and you need standard IR metrics (ideal for RAG ranking evaluation)
33
34
  - Use **Context Relevance** when you need detailed relevance assessment and want to track context usage and identify gaps
34
35
 
35
36
  Both context scorers support:
36
37
  - **Static context**: Pre-defined context arrays
37
38
  - **Dynamic context extraction**: Extract context from runs using custom functions (ideal for RAG systems, vector databases, etc.)
38
- :::
39
39
 
40
40
  ### Output Quality
41
41
 
@@ -15,8 +15,8 @@ Mastra supports real-time, incremental responses from agents and workflows, allo
15
15
 
16
16
  Mastra currently supports two streaming methods, this page explains how to use `streamVNext()`.
17
17
 
18
- 1. **`.stream()`**: Current stable API, supports **AI SDK v1**.
19
- 2. **`.streamVNext()`**: Experimental API, supports **AI SDK v2**.
18
+ 1. **`.stream()`**: Current stable API, supports **AI SDK v4** (`LanguageModelV1`).
19
+ 2. **`.streamVNext()`**: Experimental API, supports **AI SDK v5** (`LanguageModelV2`).
20
20
 
21
21
  ## Streaming with agents
22
22
 
@@ -3,6 +3,8 @@ title: "Tool Streaming | Streaming | Mastra"
3
3
  description: "Learn how to use tool streaming in Mastra, including handling tool calls, tool results, and tool execution events during streaming."
4
4
  ---
5
5
 
6
+ import { Callout } from "nextra/components";
7
+
6
8
  # Tool streaming
7
9
 
8
10
  Tool streaming in Mastra enables tools to send incremental results while they run, rather than waiting until execution finishes. This allows you to surface partial progress, intermediate states, or progressive data directly to users or upstream agents and workflows.
@@ -36,6 +38,10 @@ export const testAgent = new Agent({
36
38
 
37
39
  The `writer` argument is passed to a tool’s `execute` function and can be used to emit custom events, data, or values into the active stream. This enables tools to provide intermediate results or status updates while execution is still in progress.
38
40
 
41
+ <Callout type="warning">
42
+ You must `await` the call to `writer.write(...)` or else you will lock the stream and get a `WritableStream is locked` error.
43
+ </Callout>
44
+
39
45
  ```typescript {5,8,15} showLineNumbers copy
40
46
  import { createTool } from "@mastra/core/tools";
41
47
 
@@ -44,14 +50,14 @@ export const testTool = createTool({
44
50
  execute: async ({ context, writer }) => {
45
51
  const { value } = context;
46
52
 
47
- writer?.write({
53
+ await writer?.write({
48
54
  type: "custom-event",
49
55
  status: "pending"
50
56
  });
51
57
 
52
58
  const response = await fetch(...);
53
59
 
54
- writer?.write({
60
+ await writer?.write({
55
61
  type: "custom-event",
56
62
  status: "success"
57
63
  });
@@ -3,6 +3,8 @@ title: "Workflow Streaming | Streaming | Mastra"
3
3
  description: "Learn how to use workflow streaming in Mastra, including handling workflow execution events, step streaming, and workflow integration with agents and tools."
4
4
  ---
5
5
 
6
+ import { Callout } from "nextra/components";
7
+
6
8
  # Workflow streaming
7
9
 
8
10
  Workflow streaming in Mastra enables workflows to send incremental results while they execute, rather than waiting until completion. This allows you to surface partial progress, intermediate states, or progressive data directly to users or upstream agents and workflows.
@@ -18,6 +20,10 @@ By combining writable workflow streams with agent streaming, you gain fine-grain
18
20
 
19
21
  The `writer` argument is passed to a workflow step's `execute` function and can be used to emit custom events, data, or values into the active stream. This enables workflow steps to provide intermediate results or status updates while execution is still in progress.
20
22
 
23
+ <Callout type="warning">
24
+ You must `await` the call to `writer.write(...)` or else you will lock the stream and get a `WritableStream is locked` error.
25
+ </Callout>
26
+
21
27
  ```typescript {5,8,15} showLineNumbers copy
22
28
  import { createStep } from "@mastra/core/workflows";
23
29
 
@@ -26,14 +32,14 @@ export const testStep = createStep({
26
32
  execute: async ({ inputData, writer }) => {
27
33
  const { value } = inputData;
28
34
 
29
- writer?.write({
35
+ await writer?.write({
30
36
  type: "custom-event",
31
37
  status: "pending"
32
38
  });
33
39
 
34
40
  const response = await fetch(...);
35
41
 
36
- writer?.write({
42
+ await writer?.write({
37
43
  type: "custom-event",
38
44
  status: "success"
39
45
  });
@@ -3,6 +3,8 @@ title: "Tools Overview | Tools & MCP | Mastra Docs"
3
3
  description: Understand what tools are in Mastra, how to add them to agents, and best practices for designing effective tools.
4
4
  ---
5
5
 
6
+ import { Steps } from "nextra/components";
7
+
6
8
  # Tools Overview
7
9
 
8
10
  Tools are functions that agents can execute to perform specific tasks or access external information. They extend an agent's capabilities beyond simple text generation, allowing interaction with APIs, databases, or other systems.
@@ -65,3 +67,45 @@ Some providers that we include this layer for:
65
67
  - **DeepSeek & Meta:** Apply similar compatibility logic to ensure schema alignment and tool usability.
66
68
 
67
69
  This approach makes tool usage more reliable and model-agnostic for both custom and MCP tools.
70
+
71
+
72
+ ## Testing tools locally
73
+ There are two ways to run and test tools.
74
+
75
+ <Steps>
76
+
77
+ ### Mastra Playground
78
+
79
+ With the Mastra Dev Server running you can test a tool from the Mastra Playground by visiting [http://localhost:4111/tools](http://localhost:4111/tools) in your browser.
80
+
81
+ > For more information, see the [Local Dev Playground](/docs/server-db/local-dev-playground) documentation.
82
+
83
+ ### Command line
84
+
85
+ Invoke a tool using `.execute()`.
86
+
87
+ ```typescript filename="src/test-tool.ts" showLineNumbers copy
88
+ import { RuntimeContext } from "@mastra/core/runtime-context";
89
+ import { testTool } from "./mastra/tools/test-tool";
90
+
91
+ const runtimeContext = new RuntimeContext();
92
+
93
+ const result = await testTool.execute({
94
+ context: {
95
+ value: "foo"
96
+ },
97
+ runtimeContext
98
+ });
99
+
100
+ console.log(result);
101
+ ```
102
+
103
+ > See [createTool()](../../reference/tools/create-tool.mdx) for more information.
104
+
105
+ To test this tool, run the following:
106
+
107
+ ```bash copy
108
+ npx tsx src/test-tool.ts
109
+ ```
110
+
111
+ </Steps>
@@ -130,7 +130,7 @@ export const testWorkflow = createWorkflow({
130
130
  .commit();
131
131
  ```
132
132
 
133
- ### Register workflow
133
+ ## Register workflow
134
134
 
135
135
  Register a workflow using `workflows` in the main Mastra instance:
136
136
 
@@ -154,20 +154,24 @@ export const mastra = new Mastra({
154
154
  });
155
155
  ```
156
156
 
157
- ### Run workflow
157
+ ## Testing workflows locally
158
158
  There are two ways to run and test workflows.
159
159
 
160
160
  <Steps>
161
161
 
162
- #### Mastra Playground
162
+ ### Mastra Playground
163
163
 
164
164
  With the Mastra Dev Server running you can run the workflow from the Mastra Playground by visiting [http://localhost:4111/workflows](http://localhost:4111/workflows) in your browser.
165
165
 
166
- #### Command line
166
+ > For more information, see the [Local Dev Playground](/docs/server-db/local-dev-playground) documentation.
167
167
 
168
- Create a run instance of any Mastra workflow using `createRunAsync` and `start`:
168
+ ### Command line
169
+
170
+ Create a workflow run instance using `createRunAsync` and `start`:
169
171
 
170
172
  ```typescript {3,5} filename="src/test-workflow.ts" showLineNumbers copy
173
+ import "dotenv/config";
174
+
171
175
  import { mastra } from "./mastra";
172
176
 
173
177
  const run = await mastra.getWorkflow("testWorkflow").createRunAsync();
@@ -178,15 +182,13 @@ const result = await run.start({
178
182
  }
179
183
  });
180
184
 
181
- // Dump the complete workflow result (includes status, steps and result)
182
- console.log(JSON.stringify(result, null, 2));
185
+ console.log(result);
183
186
 
184
- // Get the workflow output value
185
187
  if (result.status === 'success') {
186
- console.log(`output value: ${result.result.output}`);
188
+ console.log(result.result.output);
187
189
  }
188
190
  ```
189
- > see [createRunAsync](/reference/workflows/create-run) and [start](/reference/workflows/run-methods/start) for more information.
191
+ > see [createRunAsync](../../reference/workflows/create-run.mdx) and [start](../../reference/workflows/run-methods/start.mdx) for more information.
190
192
 
191
193
  To trigger this workflow, run the following:
192
194
 
@@ -196,11 +198,11 @@ npx tsx src/test-workflow.ts
196
198
 
197
199
  </Steps>
198
200
 
199
- #### Run workflow results
201
+ ### Run workflow results
200
202
 
201
203
  The result of running a workflow using either `start()` or `resume()` will look like one of the following, depending on the outcome.
202
204
 
203
- ##### Status success
205
+ #### Status success
204
206
 
205
207
  ```json
206
208
  {
@@ -224,7 +226,7 @@ The result of running a workflow using either `start()` or `resume()` will look
224
226
  - **result**: Includes the final output of the workflow, typed according to the `outputSchema`
225
227
 
226
228
 
227
- ##### Status suspended
229
+ #### Status suspended
228
230
 
229
231
  ```json
230
232
  {
@@ -246,7 +248,7 @@ The result of running a workflow using either `start()` or `resume()` will look
246
248
 
247
249
  - **suspended**: An optional array listing any steps currently awaiting input before continuing
248
250
 
249
- ##### Status failed
251
+ #### Status failed
250
252
 
251
253
  ```json
252
254
  {
@@ -264,7 +266,7 @@ The result of running a workflow using either `start()` or `resume()` will look
264
266
  ```
265
267
  - **error**: An optional field that includes the error message if the workflow fails
266
268
 
267
- ### Stream workflow
269
+ ## Stream workflow
268
270
 
269
271
  Similar to the run method shown above, workflows can also be streamed:
270
272
 
@@ -286,7 +288,7 @@ for await (const chunk of result.stream) {
286
288
 
287
289
  > See [stream](/reference/workflows/run-methods/stream) and [messages](/reference/workflows/run-methods/stream#messages) for more information.
288
290
 
289
- ### Watch Workflow
291
+ ## Watch Workflow
290
292
 
291
293
  A workflow can also be watched, allowing you to inspect each event that is emitted.
292
294
 
@@ -308,7 +310,7 @@ const result = await run.start({
308
310
 
309
311
  > See [watch](/reference/workflows/run-methods/watch) for more information.
310
312
 
311
- ## More resources
313
+ ## Related
312
314
 
313
315
  - The [Workflow Guide](../../guides/guide/ai-recruiter.mdx) in the Guides section is a tutorial that covers the main concepts.
314
316
  - [Parallel Steps workflow example](../../examples/workflows/parallel-steps.mdx)
package/dist/stdio.js CHANGED
@@ -798,7 +798,11 @@ async function listDirContents(dirPath) {
798
798
  }
799
799
  }
800
800
  async function readMdxContent(docPath, queryKeywords) {
801
- const fullPath = path3__default.join(docsBaseDir, docPath);
801
+ const fullPath = path3__default.resolve(path3__default.join(docsBaseDir, docPath));
802
+ if (!fullPath.startsWith(path3__default.resolve(docsBaseDir))) {
803
+ void logger.error(`Path traversal attempt detected`);
804
+ return { found: false };
805
+ }
802
806
  void logger.debug(`Reading MDX content from: ${fullPath}`);
803
807
  try {
804
808
  const stats = await fs3.stat(fullPath);
@@ -1 +1 @@
1
- {"version":3,"file":"docs.d.ts","sourceRoot":"","sources":["../../src/tools/docs.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AA4JxB,eAAO,MAAM,eAAe;;;;;;;;;EAW1B,CAAC;AAEH,MAAM,MAAM,SAAS,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,eAAe,CAAC,CAAC;AAExD,eAAO,MAAM,QAAQ;;;;;;;;;;;;;oBAkBG,SAAS;CAiDhC,CAAC"}
1
+ {"version":3,"file":"docs.d.ts","sourceRoot":"","sources":["../../src/tools/docs.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAgKxB,eAAO,MAAM,eAAe;;;;;;;;;EAW1B,CAAC;AAEH,MAAM,MAAM,SAAS,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,eAAe,CAAC,CAAC;AAExD,eAAO,MAAM,QAAQ;;;;;;;;;;;;;oBAkBG,SAAS;CAiDhC,CAAC"}