@mastra/mcp-docs-server 1.1.2 → 1.1.3-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/docs/agents/processors.md +52 -0
- package/.docs/docs/observability/datasets/overview.md +188 -0
- package/.docs/docs/observability/datasets/running-experiments.md +266 -0
- package/.docs/docs/observability/tracing/exporters/cloud.md +7 -4
- package/.docs/docs/server/server-adapters.md +5 -4
- package/.docs/guides/deployment/amazon-ec2.md +47 -24
- package/.docs/guides/deployment/netlify.md +5 -5
- package/.docs/guides/deployment/vercel.md +3 -3
- package/.docs/guides/getting-started/next-js.md +4 -4
- package/.docs/models/gateways/openrouter.md +5 -1
- package/.docs/models/index.md +1 -1
- package/.docs/models/providers/alibaba-cn.md +3 -1
- package/.docs/models/providers/alibaba.md +3 -1
- package/.docs/models/providers/cerebras.md +1 -1
- package/.docs/models/providers/chutes.md +7 -4
- package/.docs/models/providers/cloudflare-ai-gateway.md +17 -119
- package/.docs/models/providers/fireworks-ai.md +2 -1
- package/.docs/models/providers/firmware.md +2 -7
- package/.docs/models/providers/huggingface.md +4 -1
- package/.docs/models/providers/kilo.md +333 -0
- package/.docs/models/providers/kuae-cloud-coding-plan.md +71 -0
- package/.docs/models/providers/minimax-cn-coding-plan.md +8 -7
- package/.docs/models/providers/minimax-cn.md +8 -7
- package/.docs/models/providers/minimax-coding-plan.md +8 -7
- package/.docs/models/providers/minimax.md +8 -7
- package/.docs/models/providers/nano-gpt.md +5 -1
- package/.docs/models/providers/nebius.md +2 -2
- package/.docs/models/providers/novita-ai.md +2 -1
- package/.docs/models/providers/nvidia.md +3 -2
- package/.docs/models/providers/ollama-cloud.md +2 -1
- package/.docs/models/providers/opencode.md +4 -1
- package/.docs/models/providers/poe.md +1 -1
- package/.docs/models/providers/siliconflow-cn.md +3 -8
- package/.docs/models/providers/siliconflow.md +4 -9
- package/.docs/models/providers/stepfun.md +73 -0
- package/.docs/models/providers/togetherai.md +7 -5
- package/.docs/models/providers/vivgrid.md +7 -4
- package/.docs/models/providers/zai-coding-plan.md +13 -12
- package/.docs/models/providers/zenmux.md +5 -2
- package/.docs/models/providers.md +3 -1
- package/.docs/reference/agents/generate.md +1 -1
- package/.docs/reference/configuration.md +3 -4
- package/.docs/reference/datasets/addItem.md +35 -0
- package/.docs/reference/datasets/addItems.md +33 -0
- package/.docs/reference/datasets/compareExperiments.md +48 -0
- package/.docs/reference/datasets/create.md +49 -0
- package/.docs/reference/datasets/dataset.md +78 -0
- package/.docs/reference/datasets/datasets-manager.md +84 -0
- package/.docs/reference/datasets/delete.md +23 -0
- package/.docs/reference/datasets/deleteExperiment.md +25 -0
- package/.docs/reference/datasets/deleteItem.md +25 -0
- package/.docs/reference/datasets/deleteItems.md +27 -0
- package/.docs/reference/datasets/get.md +29 -0
- package/.docs/reference/datasets/getDetails.md +45 -0
- package/.docs/reference/datasets/getExperiment.md +28 -0
- package/.docs/reference/datasets/getItem.md +31 -0
- package/.docs/reference/datasets/getItemHistory.md +29 -0
- package/.docs/reference/datasets/list.md +29 -0
- package/.docs/reference/datasets/listExperimentResults.md +37 -0
- package/.docs/reference/datasets/listExperiments.md +31 -0
- package/.docs/reference/datasets/listItems.md +44 -0
- package/.docs/reference/datasets/listVersions.md +31 -0
- package/.docs/reference/datasets/startExperiment.md +60 -0
- package/.docs/reference/datasets/startExperimentAsync.md +41 -0
- package/.docs/reference/datasets/update.md +46 -0
- package/.docs/reference/datasets/updateItem.md +36 -0
- package/.docs/reference/index.md +24 -0
- package/.docs/reference/memory/observational-memory.md +36 -0
- package/.docs/reference/observability/tracing/exporters/cloud-exporter.md +1 -1
- package/.docs/reference/processors/processor-interface.md +4 -0
- package/.docs/reference/server/koa-adapter.md +38 -0
- package/.docs/reference/streaming/agents/stream.md +1 -1
- package/.docs/reference/tools/create-tool.md +1 -1
- package/.docs/reference/workspace/e2b-sandbox.md +0 -11
- package/CHANGELOG.md +14 -0
- package/package.json +5 -5
|
@@ -342,6 +342,58 @@ export class CustomOutputProcessor implements Processor {
|
|
|
342
342
|
}
|
|
343
343
|
```
|
|
344
344
|
|
|
345
|
+
#### Emitting custom stream events with writer
|
|
346
|
+
|
|
347
|
+
Output processors receive a `writer` object that lets you emit custom data chunks back to the client during streaming. This is useful for use cases like streaming moderation results or sending UI update signals without blocking the original stream.
|
|
348
|
+
|
|
349
|
+
```typescript
|
|
350
|
+
import type { Processor, ChunkType, MastraDBMessage } from "@mastra/core";
|
|
351
|
+
|
|
352
|
+
export class ModerationProcessor implements Processor {
|
|
353
|
+
id = "moderation";
|
|
354
|
+
|
|
355
|
+
async processOutputResult({ messages, writer }) {
|
|
356
|
+
// Run moderation on the final output
|
|
357
|
+
const text = messages
|
|
358
|
+
.filter((m) => m.role === "assistant")
|
|
359
|
+
.flatMap((m) => m.content.parts?.filter((p) => p.type === "text"))
|
|
360
|
+
.map((p) => p.text)
|
|
361
|
+
.join(" ");
|
|
362
|
+
|
|
363
|
+
const result = await runModeration(text);
|
|
364
|
+
|
|
365
|
+
if (result.requiresChange) {
|
|
366
|
+
// Emit a custom event to the client with the moderated text
|
|
367
|
+
await writer?.custom({
|
|
368
|
+
type: "data-moderation-update",
|
|
369
|
+
data: {
|
|
370
|
+
originalText: text,
|
|
371
|
+
moderatedText: result.moderatedText,
|
|
372
|
+
reason: result.reason,
|
|
373
|
+
},
|
|
374
|
+
});
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
return messages;
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
```
|
|
381
|
+
|
|
382
|
+
On the client, listen for the custom chunk type in the stream:
|
|
383
|
+
|
|
384
|
+
```typescript
|
|
385
|
+
const stream = await agent.stream("Hello");
|
|
386
|
+
|
|
387
|
+
for await (const chunk of stream.fullStream) {
|
|
388
|
+
if (chunk.type === "data-moderation-update") {
|
|
389
|
+
// Update the UI with moderated text
|
|
390
|
+
updateDisplayedMessage(chunk.data.moderatedText);
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
```
|
|
394
|
+
|
|
395
|
+
Custom chunk types must use the `data-` prefix (e.g., `data-moderation-update`, `data-status`).
|
|
396
|
+
|
|
345
397
|
#### Adding metadata in output processors
|
|
346
398
|
|
|
347
399
|
You can add custom metadata to messages in `processOutputResult`. This metadata is accessible via the response object:
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
# Datasets Overview
|
|
2
|
+
|
|
3
|
+
**Added in:** `@mastra/core@1.4.0`
|
|
4
|
+
|
|
5
|
+
Datasets are collections of test cases that you run experiments against to measure how well your agents and workflows perform. Each mutation creates a new version, so you can reproduce past experiments exactly. Pair datasets with [scorers](https://mastra.ai/docs/evals/overview) to track quality across prompts, models, or code changes.
|
|
6
|
+
|
|
7
|
+
## Usage
|
|
8
|
+
|
|
9
|
+
### Configure storage
|
|
10
|
+
|
|
11
|
+
Configure storage in your Mastra instance. Datasets require a storage adapter that provides the `datasets` domain:
|
|
12
|
+
|
|
13
|
+
```typescript
|
|
14
|
+
import { Mastra } from "@mastra/core";
|
|
15
|
+
import { LibSQLStore } from "@mastra/libsql";
|
|
16
|
+
|
|
17
|
+
export const mastra = new Mastra({
|
|
18
|
+
storage: new LibSQLStore({
|
|
19
|
+
id: "my-store",
|
|
20
|
+
url: "file:./mastra.db",
|
|
21
|
+
}),
|
|
22
|
+
});
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
### Accessing the datasets API
|
|
26
|
+
|
|
27
|
+
All dataset operations are available through `mastra.datasets`:
|
|
28
|
+
|
|
29
|
+
```typescript
|
|
30
|
+
const datasets = mastra.datasets;
|
|
31
|
+
|
|
32
|
+
// Create a dataset
|
|
33
|
+
const dataset = await datasets.create({ name: "my-dataset" });
|
|
34
|
+
|
|
35
|
+
// Retrieve an existing dataset
|
|
36
|
+
const existing = await datasets.get({ id: "dataset-id" });
|
|
37
|
+
|
|
38
|
+
// List all datasets
|
|
39
|
+
const { datasets: all } = await datasets.list();
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
> **Info:** Visit the [`DatasetsManager` reference](https://mastra.ai/reference/datasets/datasets-manager) for the full list of methods.
|
|
43
|
+
|
|
44
|
+
## Creating a dataset
|
|
45
|
+
|
|
46
|
+
Call [`create()`](https://mastra.ai/reference/datasets/create) with a name and optional description:
|
|
47
|
+
|
|
48
|
+
```typescript
|
|
49
|
+
import { mastra } from "../index";
|
|
50
|
+
|
|
51
|
+
const dataset = await mastra.datasets.create({
|
|
52
|
+
name: "translation-pairs",
|
|
53
|
+
description: "English to Spanish translation test cases",
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
console.log(dataset.id); // auto-generated UUID
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### Defining schemas
|
|
60
|
+
|
|
61
|
+
You can enforce the shape of `input` and `groundTruth` by passing Zod schemas. Mastra converts them to JSON Schema at creation time:
|
|
62
|
+
|
|
63
|
+
```typescript
|
|
64
|
+
import { z } from "zod";
|
|
65
|
+
import { mastra } from "../index";
|
|
66
|
+
|
|
67
|
+
const dataset = await mastra.datasets.create({
|
|
68
|
+
name: "translation-pairs",
|
|
69
|
+
inputSchema: z.object({
|
|
70
|
+
text: z.string(),
|
|
71
|
+
sourceLang: z.string(),
|
|
72
|
+
targetLang: z.string(),
|
|
73
|
+
}),
|
|
74
|
+
groundTruthSchema: z.object({
|
|
75
|
+
translation: z.string(),
|
|
76
|
+
}),
|
|
77
|
+
});
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Items that don't match the schema are rejected at insert time.
|
|
81
|
+
|
|
82
|
+
## Adding items
|
|
83
|
+
|
|
84
|
+
Use [`addItem()`](https://mastra.ai/reference/datasets/addItem) for a single item or [`addItems()`](https://mastra.ai/reference/datasets/addItems) to insert in bulk:
|
|
85
|
+
|
|
86
|
+
```typescript
|
|
87
|
+
// Single item
|
|
88
|
+
await dataset.addItem({
|
|
89
|
+
input: { text: "Hello", sourceLang: "en", targetLang: "es" },
|
|
90
|
+
groundTruth: { translation: "Hola" },
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
// Bulk insert
|
|
94
|
+
await dataset.addItems({
|
|
95
|
+
items: [
|
|
96
|
+
{
|
|
97
|
+
input: { text: "Goodbye", sourceLang: "en", targetLang: "es" },
|
|
98
|
+
groundTruth: { translation: "Adiós" },
|
|
99
|
+
},
|
|
100
|
+
{
|
|
101
|
+
input: { text: "Thank you", sourceLang: "en", targetLang: "es" },
|
|
102
|
+
groundTruth: { translation: "Gracias" },
|
|
103
|
+
},
|
|
104
|
+
],
|
|
105
|
+
});
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## Updating and deleting items
|
|
109
|
+
|
|
110
|
+
[`updateItem()`](https://mastra.ai/reference/datasets/updateItem), [`deleteItem()`](https://mastra.ai/reference/datasets/deleteItem), and [`deleteItems()`](https://mastra.ai/reference/datasets/deleteItems) let you modify or remove existing items by `itemId`:
|
|
111
|
+
|
|
112
|
+
```typescript
|
|
113
|
+
await dataset.updateItem({
|
|
114
|
+
itemId: "item-abc-123",
|
|
115
|
+
groundTruth: { translation: "¡Hola!" },
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
await dataset.deleteItem({ itemId: "item-abc-123" });
|
|
119
|
+
|
|
120
|
+
await dataset.deleteItems({ itemIds: ["item-1", "item-2"] });
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## Listing and searching items
|
|
124
|
+
|
|
125
|
+
[`listItems()`](https://mastra.ai/reference/datasets/listItems) supports pagination and full-text search:
|
|
126
|
+
|
|
127
|
+
```typescript
|
|
128
|
+
// Paginated list
|
|
129
|
+
const { items, pagination } = await dataset.listItems({
|
|
130
|
+
page: 0,
|
|
131
|
+
perPage: 50,
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
// Full-text search
|
|
135
|
+
const { items: matches } = await dataset.listItems({
|
|
136
|
+
search: "Hello",
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
// List items at a specific version
|
|
140
|
+
const v2Items = await dataset.listItems({ version: 2 });
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## Versioning
|
|
144
|
+
|
|
145
|
+
Every mutation to a dataset's items (add, update, or delete) bumps the dataset version. This lets you pin experiments to a specific snapshot of the data.
|
|
146
|
+
|
|
147
|
+
### Listing versions
|
|
148
|
+
|
|
149
|
+
Use [`listVersions()`](https://mastra.ai/reference/datasets/listVersions) to see the paginated history of versions:
|
|
150
|
+
|
|
151
|
+
```typescript
|
|
152
|
+
const { versions, pagination } = await dataset.listVersions();
|
|
153
|
+
|
|
154
|
+
for (const v of versions) {
|
|
155
|
+
console.log(`Version ${v.version} — created ${v.createdAt}`);
|
|
156
|
+
}
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### Viewing item history
|
|
160
|
+
|
|
161
|
+
See how a specific item changed across versions by calling [`getItemHistory()`](https://mastra.ai/reference/datasets/getItemHistory) with the `itemId`:
|
|
162
|
+
|
|
163
|
+
```typescript
|
|
164
|
+
const history = await dataset.getItemHistory({ itemId: "item-abc-123" });
|
|
165
|
+
|
|
166
|
+
for (const row of history) {
|
|
167
|
+
console.log(`Version ${row.datasetVersion}`, row.input, row.groundTruth);
|
|
168
|
+
}
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
### Pinning to a version
|
|
172
|
+
|
|
173
|
+
Fetch the exact items that existed at a past version:
|
|
174
|
+
|
|
175
|
+
```typescript
|
|
176
|
+
const items = await dataset.listItems({ version: 2 });
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
You can also pin experiments to a version, see [running experiments](https://mastra.ai/docs/observability/datasets/running-experiments).
|
|
180
|
+
|
|
181
|
+
> **Info:** Visit the [`Dataset` reference](https://mastra.ai/reference/datasets/dataset) for the full list of methods and parameters.
|
|
182
|
+
|
|
183
|
+
## Related
|
|
184
|
+
|
|
185
|
+
- [Running experiments](https://mastra.ai/docs/observability/datasets/running-experiments)
|
|
186
|
+
- [Scorers overview](https://mastra.ai/docs/evals/overview)
|
|
187
|
+
- [DatasetsManager reference](https://mastra.ai/reference/datasets/datasets-manager)
|
|
188
|
+
- [Dataset reference](https://mastra.ai/reference/datasets/dataset)
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
# Running Experiments
|
|
2
|
+
|
|
3
|
+
**Added in:** `@mastra/core@1.4.0`
|
|
4
|
+
|
|
5
|
+
An experiment runs every item in a dataset through a target (an agent, a workflow, or a scorer) and then optionally scores the outputs. Use a scorer as the target when you want to evaluate an LLM judge itself. Results are persisted to storage so you can compare runs across different prompts, models, or code changes.
|
|
6
|
+
|
|
7
|
+
## Basic experiment
|
|
8
|
+
|
|
9
|
+
Call [`startExperiment()`](https://mastra.ai/reference/datasets/startExperiment) with a target and scorers:
|
|
10
|
+
|
|
11
|
+
```typescript
|
|
12
|
+
import { mastra } from "../index";
|
|
13
|
+
|
|
14
|
+
const dataset = await mastra.datasets.get({ id: "translation-dataset-id" });
|
|
15
|
+
|
|
16
|
+
const summary = await dataset.startExperiment({
|
|
17
|
+
name: "gpt-5.1-baseline",
|
|
18
|
+
targetType: "agent",
|
|
19
|
+
targetId: "translation-agent",
|
|
20
|
+
scorers: ["accuracy", "fluency"],
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
console.log(summary.status); // 'completed' | 'failed'
|
|
24
|
+
console.log(summary.succeededCount); // number of items that ran successfully
|
|
25
|
+
console.log(summary.failedCount); // number of items that failed
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
`startExperiment()` blocks until all items finish. For fire-and-forget execution, see [async experiments](#async-experiments).
|
|
29
|
+
|
|
30
|
+
## Experiment targets
|
|
31
|
+
|
|
32
|
+
You can point an experiment at a registered agent, workflow, or scorer.
|
|
33
|
+
|
|
34
|
+
### Registered agent
|
|
35
|
+
|
|
36
|
+
Point to an agent registered on your Mastra instance:
|
|
37
|
+
|
|
38
|
+
```typescript
|
|
39
|
+
const summary = await dataset.startExperiment({
|
|
40
|
+
name: "agent-v2-eval",
|
|
41
|
+
targetType: "agent",
|
|
42
|
+
targetId: "translation-agent",
|
|
43
|
+
scorers: ["accuracy"],
|
|
44
|
+
});
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Each item's `input` is passed directly to `agent.generate()`, so it must be a `string`, `string[]`, or `CoreMessage[]`.
|
|
48
|
+
|
|
49
|
+
### Registered workflow
|
|
50
|
+
|
|
51
|
+
Point to a workflow registered on your Mastra instance:
|
|
52
|
+
|
|
53
|
+
```typescript
|
|
54
|
+
const summary = await dataset.startExperiment({
|
|
55
|
+
name: "workflow-eval",
|
|
56
|
+
targetType: "workflow",
|
|
57
|
+
targetId: "translation-workflow",
|
|
58
|
+
scorers: ["accuracy"],
|
|
59
|
+
});
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
The workflow receives each item's `input` as its trigger data.
|
|
63
|
+
|
|
64
|
+
### Registered scorer
|
|
65
|
+
|
|
66
|
+
Point to a scorer to evaluate an LLM judge against ground truth:
|
|
67
|
+
|
|
68
|
+
```typescript
|
|
69
|
+
const summary = await dataset.startExperiment({
|
|
70
|
+
name: "judge-accuracy-eval",
|
|
71
|
+
targetType: "scorer",
|
|
72
|
+
targetId: "accuracy",
|
|
73
|
+
});
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
The scorer receives each item's `input` and `groundTruth`. LLM-based judges can drift over time as underlying models change, so it's important to periodically realign them against known-good labels. A dataset gives you a stable benchmark to detect that drift.
|
|
77
|
+
|
|
78
|
+
## Scoring results
|
|
79
|
+
|
|
80
|
+
Scorers automatically run after each item's target execution. Pass scorer instances or registered scorer IDs:
|
|
81
|
+
|
|
82
|
+
**Scorer IDs**:
|
|
83
|
+
|
|
84
|
+
```typescript
|
|
85
|
+
// Reference scorers registered on the Mastra instance
|
|
86
|
+
const summary = await dataset.startExperiment({
|
|
87
|
+
name: "with-registered-scorers",
|
|
88
|
+
targetType: "agent",
|
|
89
|
+
targetId: "translation-agent",
|
|
90
|
+
scorers: ["accuracy", "fluency"],
|
|
91
|
+
});
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
**Scorer instances**:
|
|
95
|
+
|
|
96
|
+
```typescript
|
|
97
|
+
import { createAnswerRelevancyScorer } from "@mastra/evals/scorers/prebuilt";
|
|
98
|
+
|
|
99
|
+
const relevancy = createAnswerRelevancyScorer({ model: "openai/gpt-4.1-nano" });
|
|
100
|
+
|
|
101
|
+
const summary = await dataset.startExperiment({
|
|
102
|
+
name: "with-scorer-instances",
|
|
103
|
+
targetType: "agent",
|
|
104
|
+
targetId: "translation-agent",
|
|
105
|
+
scorers: [relevancy],
|
|
106
|
+
});
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Each item's results include per-scorer scores:
|
|
110
|
+
|
|
111
|
+
```typescript
|
|
112
|
+
for (const item of summary.results) {
|
|
113
|
+
console.log(item.itemId, item.output);
|
|
114
|
+
for (const score of item.scores) {
|
|
115
|
+
console.log(` ${score.scorerName}: ${score.score} — ${score.reason}`);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
> **Info:** Visit the [Scorers overview](https://mastra.ai/docs/evals/overview) for details on available and custom scorers.
|
|
121
|
+
|
|
122
|
+
## Async experiments
|
|
123
|
+
|
|
124
|
+
`startExperiment()` blocks until every item completes. For long-running datasets, use [`startExperimentAsync()`](https://mastra.ai/reference/datasets/startExperimentAsync) to start the experiment in the background:
|
|
125
|
+
|
|
126
|
+
```typescript
|
|
127
|
+
const { experimentId, status } = await dataset.startExperimentAsync({
|
|
128
|
+
name: "large-dataset-run",
|
|
129
|
+
targetType: "agent",
|
|
130
|
+
targetId: "translation-agent",
|
|
131
|
+
scorers: ["accuracy"],
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
console.log(experimentId); // UUID
|
|
135
|
+
console.log(status); // 'pending'
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
Poll for completion using [`getExperiment()`](https://mastra.ai/reference/datasets/getExperiment):
|
|
139
|
+
|
|
140
|
+
```typescript
|
|
141
|
+
let experiment = await dataset.getExperiment({ experimentId });
|
|
142
|
+
|
|
143
|
+
while (experiment.status === "pending" || experiment.status === "running") {
|
|
144
|
+
await new Promise(resolve => setTimeout(resolve, 5000));
|
|
145
|
+
experiment = await dataset.getExperiment({ experimentId });
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
console.log(experiment.status); // 'completed' | 'failed'
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
## Configuration options
|
|
152
|
+
|
|
153
|
+
### Concurrency
|
|
154
|
+
|
|
155
|
+
Control how many items run in parallel (default: 5):
|
|
156
|
+
|
|
157
|
+
```typescript
|
|
158
|
+
const summary = await dataset.startExperiment({
|
|
159
|
+
targetType: "agent",
|
|
160
|
+
targetId: "translation-agent",
|
|
161
|
+
maxConcurrency: 10,
|
|
162
|
+
});
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
### Timeouts and retries
|
|
166
|
+
|
|
167
|
+
Set a per-item timeout (in milliseconds) and retry count:
|
|
168
|
+
|
|
169
|
+
```typescript
|
|
170
|
+
const summary = await dataset.startExperiment({
|
|
171
|
+
targetType: "agent",
|
|
172
|
+
targetId: "translation-agent",
|
|
173
|
+
itemTimeout: 30_000, // 30 seconds per item
|
|
174
|
+
maxRetries: 2, // retry failed items up to 2 times
|
|
175
|
+
});
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
Retries use exponential backoff. Abort errors are never retried.
|
|
179
|
+
|
|
180
|
+
### Aborting an experiment
|
|
181
|
+
|
|
182
|
+
Pass an `AbortSignal` to cancel a running experiment:
|
|
183
|
+
|
|
184
|
+
```typescript
|
|
185
|
+
const controller = new AbortController();
|
|
186
|
+
|
|
187
|
+
// Cancel after 60 seconds
|
|
188
|
+
setTimeout(() => controller.abort(), 60_000);
|
|
189
|
+
|
|
190
|
+
const summary = await dataset.startExperiment({
|
|
191
|
+
targetType: "agent",
|
|
192
|
+
targetId: "translation-agent",
|
|
193
|
+
signal: controller.signal,
|
|
194
|
+
});
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
Remaining items are marked as skipped in the summary.
|
|
198
|
+
|
|
199
|
+
### Pinning a dataset version
|
|
200
|
+
|
|
201
|
+
Run against a specific snapshot of the dataset:
|
|
202
|
+
|
|
203
|
+
```typescript
|
|
204
|
+
const summary = await dataset.startExperiment({
|
|
205
|
+
targetType: "agent",
|
|
206
|
+
targetId: "translation-agent",
|
|
207
|
+
version: 3, // use items from dataset version 3
|
|
208
|
+
});
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
## Viewing results
|
|
212
|
+
|
|
213
|
+
### Listing experiments
|
|
214
|
+
|
|
215
|
+
```typescript
|
|
216
|
+
const { experiments, pagination } = await dataset.listExperiments({
|
|
217
|
+
page: 0,
|
|
218
|
+
perPage: 10,
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
for (const exp of experiments) {
|
|
222
|
+
console.log(`${exp.name} — ${exp.status} (${exp.succeededCount}/${exp.totalItems})`);
|
|
223
|
+
}
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
### Experiment details
|
|
227
|
+
|
|
228
|
+
```typescript
|
|
229
|
+
const experiment = await dataset.getExperiment({
|
|
230
|
+
experimentId: "exp-abc-123",
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
console.log(experiment.status);
|
|
234
|
+
console.log(experiment.startedAt);
|
|
235
|
+
console.log(experiment.completedAt);
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
### Item-level results
|
|
239
|
+
|
|
240
|
+
```typescript
|
|
241
|
+
const { results, pagination } = await dataset.listExperimentResults({
|
|
242
|
+
experimentId: "exp-abc-123",
|
|
243
|
+
page: 0,
|
|
244
|
+
perPage: 50,
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
for (const result of results) {
|
|
248
|
+
console.log(result.itemId, result.output, result.error);
|
|
249
|
+
}
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
## Understanding the summary
|
|
253
|
+
|
|
254
|
+
`startExperiment()` returns an `ExperimentSummary` with counts and per-item results:
|
|
255
|
+
|
|
256
|
+
- `completedWithErrors` is `true` when the experiment finished but some items failed.
|
|
257
|
+
- Items cancelled via `signal` appear in `skippedCount`.
|
|
258
|
+
|
|
259
|
+
> **Info:** Visit the [`startExperiment` reference](https://mastra.ai/reference/datasets/startExperiment) for the full parameter and return type documentation.
|
|
260
|
+
|
|
261
|
+
## Related
|
|
262
|
+
|
|
263
|
+
- [Datasets overview](https://mastra.ai/docs/observability/datasets/overview)
|
|
264
|
+
- [Scorers overview](https://mastra.ai/docs/evals/overview)
|
|
265
|
+
- [`startExperiment` reference](https://mastra.ai/reference/datasets/startExperiment)
|
|
266
|
+
- [`listExperimentResults` reference](https://mastra.ai/reference/datasets/listExperimentResults)
|
|
@@ -7,8 +7,9 @@ The `CloudExporter` sends traces to Mastra Cloud for centralized monitoring and
|
|
|
7
7
|
### Prerequisites
|
|
8
8
|
|
|
9
9
|
1. **Mastra Cloud Account**: Sign up at [cloud.mastra.ai](https://cloud.mastra.ai)
|
|
10
|
-
2. **
|
|
11
|
-
3. **
|
|
10
|
+
2. **Mastra Cloud Project**: Create a project in Mastra Cloud. Traces are scoped per project, so even if you only want observability, you need a project to act as the destination for your traces.
|
|
11
|
+
3. **Access Token**: Generate in your project's sidebar under **Project Settings → Access Tokens**
|
|
12
|
+
4. **Environment Variables**: Set your credentials:
|
|
12
13
|
|
|
13
14
|
```bash
|
|
14
15
|
MASTRA_CLOUD_ACCESS_TOKEN=mst_xxxxxxxxxxxxxxxx
|
|
@@ -90,9 +91,9 @@ new CloudExporter({
|
|
|
90
91
|
|
|
91
92
|
1. Navigate to [cloud.mastra.ai](https://cloud.mastra.ai)
|
|
92
93
|
|
|
93
|
-
2. Select your
|
|
94
|
+
2. Select the project associated with your access token
|
|
94
95
|
|
|
95
|
-
3. Go to Observability → Traces
|
|
96
|
+
3. Go to **Observability → Traces**
|
|
96
97
|
|
|
97
98
|
4. Use filters to find specific traces:
|
|
98
99
|
|
|
@@ -101,6 +102,8 @@ new CloudExporter({
|
|
|
101
102
|
- Trace ID
|
|
102
103
|
- Error status
|
|
103
104
|
|
|
105
|
+
> **Note:** Traces are scoped to the project that issued the access token. To view traces, make sure you're viewing the same project you generated the token from.
|
|
106
|
+
|
|
104
107
|
### Features
|
|
105
108
|
|
|
106
109
|
- **Trace Timeline** - Visual execution flow
|
|
@@ -479,10 +479,11 @@ When using server adapters, configuration comes from two places: the Mastra `ser
|
|
|
479
479
|
|
|
480
480
|
The adapter reads these settings from `mastra.getServer()`:
|
|
481
481
|
|
|
482
|
-
| Option | Description
|
|
483
|
-
| --------------- |
|
|
484
|
-
| `auth` | Authentication config, used by `registerAuthMiddleware()`.
|
|
485
|
-
| `bodySizeLimit` | Default body size limit in bytes. Can be overridden per-adapter via `bodyLimitOptions`.
|
|
482
|
+
| Option | Description |
|
|
483
|
+
| --------------- | ----------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
484
|
+
| `auth` | Authentication config, used by `registerAuthMiddleware()`. |
|
|
485
|
+
| `bodySizeLimit` | Default body size limit in bytes. Can be overridden per-adapter via `bodyLimitOptions`. |
|
|
486
|
+
| `onError` | Custom error handler called when an unhandled error occurs in a route handler. See [server.onError](https://mastra.ai/reference/configuration). |
|
|
486
487
|
|
|
487
488
|
### Adapter constructor only
|
|
488
489
|
|
|
@@ -1,19 +1,23 @@
|
|
|
1
|
-
# Amazon EC2
|
|
1
|
+
# Deploy Mastra to Amazon EC2
|
|
2
2
|
|
|
3
|
-
Deploy your Mastra
|
|
3
|
+
Deploy your Mastra server to Amazon EC2. This gives you full control over your server environment and supports long-running agents and workflows.
|
|
4
4
|
|
|
5
|
-
> **
|
|
5
|
+
> **Info:** This guide covers deploying the [Mastra server](https://mastra.ai/docs/server/mastra-server). If you're using a [server adapter](https://mastra.ai/docs/server/server-adapters) or [web framework](https://mastra.ai/docs/deployment/web-framework), deploy the way you normally would for that framework.
|
|
6
6
|
|
|
7
|
-
##
|
|
7
|
+
## Before you begin
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
- An EC2 instance running Ubuntu 24+ or Amazon Linux
|
|
11
|
-
- A domain name with an A record pointing to your instance
|
|
12
|
-
- A reverse proxy configured (e.g., using [nginx](https://nginx.org/))
|
|
13
|
-
- SSL certificate configured (e.g., using [Let's Encrypt](https://letsencrypt.org/))
|
|
14
|
-
- Node.js 22.13.0 or later installed on your instance
|
|
9
|
+
You'll need:
|
|
15
10
|
|
|
16
|
-
|
|
11
|
+
- A [Mastra application](https://mastra.ai/guides/getting-started/quickstart)
|
|
12
|
+
- An [EC2](https://aws.amazon.com/ec2/) instance (Ubuntu or Amazon Linux) with Git and Node.js 22.13.0+ installed
|
|
13
|
+
|
|
14
|
+
For production, you'll also need:
|
|
15
|
+
|
|
16
|
+
- A domain name pointing to your instance (required for SSL certificates)
|
|
17
|
+
- An SSL certificate for your domain (e.g., using [Certbot](https://certbot.eff.org/) with Let's Encrypt)
|
|
18
|
+
- A reverse proxy (e.g., [nginx](https://nginx.org/)) to forward traffic to your application
|
|
19
|
+
|
|
20
|
+
## Deploy
|
|
17
21
|
|
|
18
22
|
1. Connect to your EC2 instance and clone your repository:
|
|
19
23
|
|
|
@@ -26,7 +30,7 @@ Deploy your Mastra applications to Amazon EC2 (Elastic Cloud Compute).
|
|
|
26
30
|
**Private Repository**:
|
|
27
31
|
|
|
28
32
|
```bash
|
|
29
|
-
git clone
|
|
33
|
+
git clone git@github.com:<your-username>/<your-repository>.git
|
|
30
34
|
```
|
|
31
35
|
|
|
32
36
|
Navigate to the repository directory:
|
|
@@ -67,7 +71,7 @@ Deploy your Mastra applications to Amazon EC2 (Elastic Cloud Compute).
|
|
|
67
71
|
touch .env
|
|
68
72
|
```
|
|
69
73
|
|
|
70
|
-
|
|
74
|
+
Remember to set your environment variables needed to run your application (e.g. your model provider API key):
|
|
71
75
|
|
|
72
76
|
```bash
|
|
73
77
|
OPENAI_API_KEY=<your-openai-api-key>
|
|
@@ -76,32 +80,51 @@ Deploy your Mastra applications to Amazon EC2 (Elastic Cloud Compute).
|
|
|
76
80
|
|
|
77
81
|
4. Build the application:
|
|
78
82
|
|
|
83
|
+
**npm**:
|
|
84
|
+
|
|
79
85
|
```bash
|
|
80
86
|
npm run build
|
|
81
87
|
```
|
|
82
88
|
|
|
89
|
+
**pnpm**:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
pnpm run build
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
**Yarn**:
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
yarn build
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
**Bun**:
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
bun run build
|
|
105
|
+
```
|
|
106
|
+
|
|
83
107
|
5. Run the application:
|
|
84
108
|
|
|
85
109
|
```bash
|
|
86
110
|
node --env-file=".env" .mastra/output/index.mjs
|
|
87
111
|
```
|
|
88
112
|
|
|
89
|
-
|
|
113
|
+
This is a basic example. In production, use a process manager like [PM2](https://pm2.keymetrics.io/) or [systemd](https://systemd.io/) to keep your application running and handle restarts.
|
|
114
|
+
|
|
115
|
+
> **Warning:** Set up [authentication](https://mastra.ai/docs/server/auth) before exposing your endpoints publicly.
|
|
90
116
|
|
|
91
|
-
|
|
117
|
+
6. Your Mastra server is now running on port 4111, but it's only accessible locally.
|
|
92
118
|
|
|
93
|
-
You can
|
|
119
|
+
You can open port 4111 in your EC2 security group for direct access, or configure a reverse proxy (such as nginx) to listen on ports 80 and 443 and forward requests to `http://localhost:4111`.
|
|
94
120
|
|
|
95
|
-
|
|
121
|
+
In production, you should use a reverse proxy so you can configure HTTPS. HTTPS encrypts traffic and is required for most webhook integrations and external services your agents interact with.
|
|
96
122
|
|
|
97
|
-
|
|
98
|
-
import { MastraClient } from "@mastra/client-js";
|
|
123
|
+
7. Verify your deployment at `https://<your-ec2-address>/api/agents`, which should return a JSON list of your agents.
|
|
99
124
|
|
|
100
|
-
|
|
101
|
-
baseUrl: "https://<your-domain-name>",
|
|
102
|
-
});
|
|
103
|
-
```
|
|
125
|
+
8. You can now call your Mastra endpoints over HTTP.
|
|
104
126
|
|
|
105
127
|
## Next steps
|
|
106
128
|
|
|
107
|
-
- [Mastra Client
|
|
129
|
+
- [Connect from a Mastra Client](https://mastra.ai/docs/server/mastra-client)
|
|
130
|
+
- [Deployment overview](https://mastra.ai/docs/deployment/overview)
|