@mastra/mcp-docs-server 0.13.17-alpha.3 → 0.13.17-alpha.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/organized/changelogs/%40mastra%2Fagent-builder.md +15 -0
- package/.docs/organized/changelogs/%40mastra%2Fai-sdk.md +10 -0
- package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +17 -17
- package/.docs/organized/changelogs/%40mastra%2Fcore.md +35 -35
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloud.md +8 -0
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +12 -12
- package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +20 -20
- package/.docs/organized/changelogs/%40mastra%2Fevals.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Ffirecrawl.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fgithub.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +15 -15
- package/.docs/organized/changelogs/%40mastra%2Fmcp-registry-registry.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fmcp.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fmemory.md +14 -14
- package/.docs/organized/changelogs/%40mastra%2Fpg.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +21 -21
- package/.docs/organized/changelogs/%40mastra%2Fragie.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fschema-compat.md +7 -0
- package/.docs/organized/changelogs/%40mastra%2Fserver.md +16 -16
- package/.docs/organized/changelogs/%40mastra%2Fvoice-google-gemini-live.md +10 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-openai-realtime.md +11 -11
- package/.docs/organized/changelogs/create-mastra.md +7 -7
- package/.docs/organized/changelogs/mastra.md +19 -19
- package/.docs/organized/code-examples/a2a.md +1 -1
- package/.docs/organized/code-examples/agent-network.md +1 -1
- package/.docs/organized/code-examples/agent.md +22 -1
- package/.docs/organized/code-examples/agui.md +1 -1
- package/.docs/organized/code-examples/ai-sdk-useChat.md +1 -1
- package/.docs/organized/code-examples/ai-sdk-v5.md +2 -2
- package/.docs/organized/code-examples/assistant-ui.md +3 -3
- package/.docs/organized/code-examples/bird-checker-with-express.md +1 -1
- package/.docs/organized/code-examples/bird-checker-with-nextjs-and-eval.md +1 -1
- package/.docs/organized/code-examples/bird-checker-with-nextjs.md +1 -1
- package/.docs/organized/code-examples/client-side-tools.md +1 -1
- package/.docs/organized/code-examples/crypto-chatbot.md +1 -1
- package/.docs/organized/code-examples/experimental-auth-weather-agent.md +1 -1
- package/.docs/organized/code-examples/fireworks-r1.md +1 -1
- package/.docs/organized/code-examples/mcp-configuration.md +2 -2
- package/.docs/organized/code-examples/mcp-registry-registry.md +1 -1
- package/.docs/organized/code-examples/memory-with-mem0.md +1 -1
- package/.docs/organized/code-examples/memory-with-processors.md +1 -1
- package/.docs/organized/code-examples/openapi-spec-writer.md +2 -2
- package/.docs/organized/code-examples/quick-start.md +1 -1
- package/.docs/organized/code-examples/stock-price-tool.md +1 -1
- package/.docs/organized/code-examples/weather-agent.md +1 -1
- package/.docs/organized/code-examples/workflow-ai-recruiter.md +1 -1
- package/.docs/organized/code-examples/workflow-with-inline-steps.md +1 -1
- package/.docs/organized/code-examples/workflow-with-memory.md +1 -1
- package/.docs/organized/code-examples/workflow-with-separate-steps.md +1 -1
- package/.docs/organized/code-examples/workflow-with-suspend-resume.md +1 -1
- package/.docs/raw/agents/overview.mdx +35 -4
- package/.docs/raw/deployment/monorepo.mdx +1 -1
- package/.docs/raw/frameworks/agentic-uis/ai-sdk.mdx +44 -14
- package/.docs/raw/getting-started/installation.mdx +52 -4
- package/.docs/raw/getting-started/templates.mdx +2 -22
- package/.docs/raw/reference/agents/generate.mdx +2 -2
- package/.docs/raw/reference/agents/getDefaultStreamOptions.mdx +2 -1
- package/.docs/raw/reference/agents/getDefaultVNextStreamOptions.mdx +1 -1
- package/.docs/raw/reference/agents/stream.mdx +2 -2
- package/.docs/raw/reference/cli/build.mdx +0 -6
- package/.docs/raw/reference/cli/start.mdx +8 -1
- package/.docs/raw/reference/scorers/noise-sensitivity.mdx +237 -0
- package/.docs/raw/reference/scorers/prompt-alignment.mdx +369 -0
- package/.docs/raw/scorers/off-the-shelf-scorers.mdx +2 -2
- package/.docs/raw/streaming/overview.mdx +2 -2
- package/.docs/raw/streaming/tool-streaming.mdx +8 -2
- package/.docs/raw/streaming/workflow-streaming.mdx +8 -2
- package/.docs/raw/tools-mcp/overview.mdx +44 -0
- package/.docs/raw/workflows/overview.mdx +19 -17
- package/dist/stdio.js +5 -1
- package/dist/tools/docs.d.ts.map +1 -1
- package/package.json +6 -6
|
@@ -76,7 +76,7 @@ import { useChat } from "@ai-sdk/react";
|
|
|
76
76
|
|
|
77
77
|
export function Chat() {
|
|
78
78
|
const { messages, input, handleInputChange, handleSubmit } = useChat({
|
|
79
|
-
api: "api/chat"
|
|
79
|
+
api: "/api/chat"
|
|
80
80
|
});
|
|
81
81
|
return (
|
|
82
82
|
<div>
|
|
@@ -432,22 +432,52 @@ This utility is helpful when you want to fetch messages directly from your stora
|
|
|
432
432
|
|
|
433
433
|
### Enabling stream compatibility
|
|
434
434
|
|
|
435
|
-
To enable AI SDK v5 compatibility, use the
|
|
435
|
+
To enable AI SDK v5 compatibility, use the `@mastra/ai-sdk` package:
|
|
436
436
|
|
|
437
|
-
|
|
438
|
-
|
|
437
|
+
<Tabs items={["npm", "yarn", "pnpm", "bun"]}>
|
|
438
|
+
<Tabs.Tab>
|
|
439
|
+
```bash copy
|
|
440
|
+
npm install @mastra/ai-sdk
|
|
441
|
+
```
|
|
442
|
+
</Tabs.Tab>
|
|
443
|
+
<Tabs.Tab>
|
|
444
|
+
```bash copy
|
|
445
|
+
yarn add @mastra/ai-sdk
|
|
446
|
+
```
|
|
447
|
+
</Tabs.Tab>
|
|
448
|
+
<Tabs.Tab>
|
|
449
|
+
```bash copy
|
|
450
|
+
pnpm add @mastra/ai-sdk
|
|
451
|
+
```
|
|
452
|
+
</Tabs.Tab>
|
|
453
|
+
<Tabs.Tab>
|
|
454
|
+
```bash copy
|
|
455
|
+
bun add @mastra/ai-sdk
|
|
456
|
+
```
|
|
457
|
+
</Tabs.Tab>
|
|
458
|
+
</Tabs>
|
|
439
459
|
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
460
|
+
```typescript filename="src/mastra/index.ts" showLineNumbers copy
|
|
461
|
+
export const mastra = new Mastra({
|
|
462
|
+
server: {
|
|
463
|
+
apiRoutes: [
|
|
464
|
+
chatRoute({
|
|
465
|
+
path: '/chat',
|
|
466
|
+
agent: 'weatherAgent',
|
|
467
|
+
}),
|
|
468
|
+
],
|
|
469
|
+
},
|
|
470
|
+
});
|
|
471
|
+
```
|
|
448
472
|
|
|
449
|
-
|
|
450
|
-
|
|
473
|
+
In your application call the `useChat()` hook.
|
|
474
|
+
```typescript
|
|
475
|
+
const { error, status, sendMessage, messages, regenerate, stop } =
|
|
476
|
+
useChat({
|
|
477
|
+
transport: new DefaultChatTransport({
|
|
478
|
+
api: 'http://localhost:4111/api/chat',
|
|
479
|
+
}),
|
|
480
|
+
});
|
|
451
481
|
```
|
|
452
482
|
|
|
453
483
|
<Callout type="info">
|
|
@@ -22,6 +22,10 @@ Mastra also supports other LLM providers. For a full list of supported models an
|
|
|
22
22
|
|
|
23
23
|
Our CLI is the fastest way to get started with Mastra. You can run `create mastra` anywhere on your machine.
|
|
24
24
|
|
|
25
|
+
<Callout type="warning">
|
|
26
|
+
The CLI currently generates projects that are only compatible with `ai-sdk v4`. Please refer to [ai-sdk v5 compatibility](#ai-sdk-v5-compatibility) if you wish to use `ai-sdk v5`.
|
|
27
|
+
</Callout>
|
|
28
|
+
|
|
25
29
|
<Steps>
|
|
26
30
|
|
|
27
31
|
## Start the CLI wizard
|
|
@@ -171,7 +175,7 @@ This helps users manually set up a Mastra project with their preferred package m
|
|
|
171
175
|
|
|
172
176
|
npm install typescript tsx @types/node mastra@latest --save-dev
|
|
173
177
|
|
|
174
|
-
npm install @mastra/core@latest zod@^3 @ai-sdk/openai
|
|
178
|
+
npm install @mastra/core@latest zod@^3 @ai-sdk/openai@^1
|
|
175
179
|
```
|
|
176
180
|
|
|
177
181
|
</Tab>
|
|
@@ -181,7 +185,7 @@ This helps users manually set up a Mastra project with their preferred package m
|
|
|
181
185
|
|
|
182
186
|
pnpm add typescript tsx @types/node mastra@latest --save-dev
|
|
183
187
|
|
|
184
|
-
pnpm add @mastra/core@latest zod@^3 @ai-sdk/openai
|
|
188
|
+
pnpm add @mastra/core@latest zod@^3 @ai-sdk/openai@^1
|
|
185
189
|
```
|
|
186
190
|
|
|
187
191
|
</Tab>
|
|
@@ -191,7 +195,7 @@ This helps users manually set up a Mastra project with their preferred package m
|
|
|
191
195
|
|
|
192
196
|
yarn add typescript tsx @types/node mastra@latest --dev
|
|
193
197
|
|
|
194
|
-
yarn add @mastra/core@latest zod@^3 @ai-sdk/openai
|
|
198
|
+
yarn add @mastra/core@latest zod@^3 @ai-sdk/openai@^1
|
|
195
199
|
```
|
|
196
200
|
|
|
197
201
|
</Tab>
|
|
@@ -201,7 +205,7 @@ This helps users manually set up a Mastra project with their preferred package m
|
|
|
201
205
|
|
|
202
206
|
bun add typescript tsx @types/node mastra@latest --dev
|
|
203
207
|
|
|
204
|
-
bun add @mastra/core@latest zod@^3 @ai-sdk/openai
|
|
208
|
+
bun add @mastra/core@latest zod@^3 @ai-sdk/openai@^1
|
|
205
209
|
```
|
|
206
210
|
|
|
207
211
|
</Tab>
|
|
@@ -372,6 +376,50 @@ To install Mastra in an existing project, use the `mastra init` command.
|
|
|
372
376
|
|
|
373
377
|
> See [mastra init](/reference/cli/init) for more information.
|
|
374
378
|
|
|
379
|
+
## ai-sdk v5 compatibility
|
|
380
|
+
|
|
381
|
+
By default, Mastra projects are only compatible with `ai-sdk v4`, if you wish to use `ai-sdk v5`, please make the following changes to the generated project.
|
|
382
|
+
|
|
383
|
+
Install the latest version to the `@ai-sdk/openai` package:
|
|
384
|
+
<Tabs items={["npm", "pnpm", "yarn", "bun"]}>
|
|
385
|
+
|
|
386
|
+
<Tab>
|
|
387
|
+
```bash copy
|
|
388
|
+
npm install @ai-sdk/openai@latest
|
|
389
|
+
```
|
|
390
|
+
|
|
391
|
+
</Tab>
|
|
392
|
+
<Tab>
|
|
393
|
+
```bash copy
|
|
394
|
+
pnpm add @ai-sdk/openai@latest
|
|
395
|
+
```
|
|
396
|
+
|
|
397
|
+
</Tab>
|
|
398
|
+
<Tab>
|
|
399
|
+
```bash copy
|
|
400
|
+
yarn add @ai-sdk/openai@latest
|
|
401
|
+
```
|
|
402
|
+
|
|
403
|
+
</Tab>
|
|
404
|
+
<Tab>
|
|
405
|
+
```bash copy
|
|
406
|
+
bun add @ai-sdk/openai@latest
|
|
407
|
+
```
|
|
408
|
+
|
|
409
|
+
</Tab>
|
|
410
|
+
</Tabs>
|
|
411
|
+
|
|
412
|
+
Update the `planActivities` step in `src/mastra/workflows/weather-workflow.ts` as follow:
|
|
413
|
+
```ts filename="src/mastra/workflows/weather-workflow.ts" showLineNumbers{150} copy
|
|
414
|
+
const response = await agent.stream([ // [!code --]
|
|
415
|
+
const response = await agent.streamVNext([ // [!code ++]
|
|
416
|
+
{
|
|
417
|
+
role: 'user',
|
|
418
|
+
content: prompt,
|
|
419
|
+
},
|
|
420
|
+
]);
|
|
421
|
+
```
|
|
422
|
+
|
|
375
423
|
## Next steps
|
|
376
424
|
|
|
377
425
|
- [Local Development](/docs/server-db/local-dev-playground)
|
|
@@ -8,16 +8,7 @@ import { Tabs, Tab } from "@/components/tabs";
|
|
|
8
8
|
|
|
9
9
|
# Templates
|
|
10
10
|
|
|
11
|
-
Templates are pre-built Mastra projects that demonstrate specific use cases and patterns.
|
|
12
|
-
|
|
13
|
-
## What Templates Offer
|
|
14
|
-
|
|
15
|
-
Templates include:
|
|
16
|
-
|
|
17
|
-
- **Complete working examples** - Functional code demonstrating specific patterns
|
|
18
|
-
- **Best practices** - Proper project structure and Mastra conventions
|
|
19
|
-
- **Educational value** - Learn different Mastra features through examples
|
|
20
|
-
- **Quick start** - Bootstrap projects faster than starting from scratch
|
|
11
|
+
Templates are pre-built Mastra projects that demonstrate specific use cases and patterns. Browse available templates in the [templates directory](https://mastra.ai/templates).
|
|
21
12
|
|
|
22
13
|
## Using Templates
|
|
23
14
|
|
|
@@ -81,15 +72,4 @@ After installation:
|
|
|
81
72
|
Each template includes a comprehensive README with specific setup instructions and usage examples.
|
|
82
73
|
</Callout>
|
|
83
74
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
Browse available templates in the [templates directory](https://mastra.ai/templates).
|
|
87
|
-
|
|
88
|
-
## Next Steps
|
|
89
|
-
|
|
90
|
-
- **Explore code** - Understand how templates implement functionality
|
|
91
|
-
- **Customize** - Modify agents, tools, and workflows for your use case
|
|
92
|
-
- **Learn patterns** - Study templates to understand Mastra best practices
|
|
93
|
-
- **Contribute** - Create your own templates for the community
|
|
94
|
-
|
|
95
|
-
For detailed information on creating templates, see the [Templates Reference](/reference/templates).
|
|
75
|
+
For detailed information on creating templates, see the [Templates Reference](/reference/templates).
|
|
@@ -490,5 +490,5 @@ await agent.generate(
|
|
|
490
490
|
|
|
491
491
|
## Related
|
|
492
492
|
|
|
493
|
-
- [
|
|
494
|
-
- [Streaming responses](../../docs/agents/
|
|
493
|
+
- [Generating responses](../../docs/agents/overview.mdx#generating-responses)
|
|
494
|
+
- [Streaming responses](../../docs/agents/overview.mdx#streaming-responses)
|
|
@@ -63,5 +63,6 @@ await agent.getDefaultStreamOptions({
|
|
|
63
63
|
|
|
64
64
|
## Related
|
|
65
65
|
|
|
66
|
-
- [
|
|
66
|
+
- [Generating responses](../../docs/agents/overview.mdx#generating-responses)
|
|
67
|
+
- [Streaming responses](../../docs/agents/overview.mdx#streaming-responses)
|
|
67
68
|
- [Runtime variables](../../docs/agents/runtime-variables.mdx)
|
|
@@ -63,5 +63,5 @@ await agent.getDefaultVNextStreamOptions({
|
|
|
63
63
|
|
|
64
64
|
## Related
|
|
65
65
|
|
|
66
|
-
- [
|
|
66
|
+
- [Streaming with agents](../../docs/streaming/overview.mdx#streaming-with-agents)
|
|
67
67
|
- [Runtime variables](../../docs/agents/runtime-variables.mdx)
|
|
@@ -475,5 +475,5 @@ await agent.stream("message for agent", {
|
|
|
475
475
|
|
|
476
476
|
## Related
|
|
477
477
|
|
|
478
|
-
- [
|
|
479
|
-
- [
|
|
478
|
+
- [Generating responses](../../docs/agents/overview.mdx#generating-responses)
|
|
479
|
+
- [Streaming responses](../../docs/agents/overview.mdx#streaming-responses)
|
|
@@ -35,12 +35,6 @@ mastra build [options]
|
|
|
35
35
|
description: "Comma-separated list of paths to tool files to include",
|
|
36
36
|
isOptional: true,
|
|
37
37
|
},
|
|
38
|
-
{
|
|
39
|
-
name: "--env",
|
|
40
|
-
type: "string",
|
|
41
|
-
description: "Path to custom environment file",
|
|
42
|
-
isOptional: true,
|
|
43
|
-
},
|
|
44
38
|
{
|
|
45
39
|
name: "--help",
|
|
46
40
|
type: "boolean",
|
|
@@ -20,7 +20,8 @@ mastra start [options]
|
|
|
20
20
|
| Option | Description |
|
|
21
21
|
|--------|-------------|
|
|
22
22
|
| `-d, --dir <path>` | Path to your built Mastra output directory (default: .mastra/output) |
|
|
23
|
-
| `-
|
|
23
|
+
| `-e, --env <env>` | Custom env file to include in the start (default: .env.production, .env) |
|
|
24
|
+
| `-nt, --no-telemetry` | Disable telemetry on start |
|
|
24
25
|
|
|
25
26
|
## Examples
|
|
26
27
|
|
|
@@ -36,6 +37,12 @@ Start from a custom output directory:
|
|
|
36
37
|
mastra start --dir ./my-output
|
|
37
38
|
```
|
|
38
39
|
|
|
40
|
+
Start with a custom environment file:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
mastra start --env .env.staging
|
|
44
|
+
```
|
|
45
|
+
|
|
39
46
|
Start with telemetry disabled:
|
|
40
47
|
|
|
41
48
|
```bash
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: Noise Sensitivity Scorer | Scorers | Mastra Docs"
|
|
3
|
+
description: Documentation for the Noise Sensitivity Scorer in Mastra. Evaluates how robust an agent is when exposed to irrelevant, distracting, or misleading information in user queries.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
import { PropertiesTable } from "@/components/properties-table";
|
|
7
|
+
|
|
8
|
+
# Noise Sensitivity Scorer
|
|
9
|
+
|
|
10
|
+
The `createNoiseSensitivityScorerLLM()` function creates a scorer that evaluates how robust an agent is when exposed to irrelevant, distracting, or misleading information. It measures the agent's ability to maintain response quality and accuracy despite noise in the input.
|
|
11
|
+
|
|
12
|
+
## Parameters
|
|
13
|
+
|
|
14
|
+
<PropertiesTable
|
|
15
|
+
content={[
|
|
16
|
+
{
|
|
17
|
+
name: "model",
|
|
18
|
+
type: "MastraLanguageModel",
|
|
19
|
+
description: "The language model to use for evaluating noise sensitivity",
|
|
20
|
+
required: true,
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
name: "options",
|
|
24
|
+
type: "NoiseSensitivityOptions",
|
|
25
|
+
description: "Configuration options for the scorer",
|
|
26
|
+
required: true,
|
|
27
|
+
children: [
|
|
28
|
+
{
|
|
29
|
+
name: "baselineResponse",
|
|
30
|
+
type: "string",
|
|
31
|
+
description: "The expected clean response to compare against (what the agent should ideally produce without noise)",
|
|
32
|
+
required: true,
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
name: "noisyQuery",
|
|
36
|
+
type: "string",
|
|
37
|
+
description: "The user query with added noise, distractions, or misleading information",
|
|
38
|
+
required: true,
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
name: "noiseType",
|
|
42
|
+
type: "string",
|
|
43
|
+
description: "Type of noise added (e.g., 'misinformation', 'distractors', 'adversarial')",
|
|
44
|
+
required: false,
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
name: "scoring",
|
|
48
|
+
type: "object",
|
|
49
|
+
description: "Advanced scoring configuration for fine-tuning evaluation",
|
|
50
|
+
required: false,
|
|
51
|
+
children: [
|
|
52
|
+
{
|
|
53
|
+
name: "impactWeights",
|
|
54
|
+
type: "object",
|
|
55
|
+
description: "Custom weights for different impact levels",
|
|
56
|
+
required: false,
|
|
57
|
+
children: [
|
|
58
|
+
{
|
|
59
|
+
name: "none",
|
|
60
|
+
type: "number",
|
|
61
|
+
description: "Weight for no impact (default: 1.0)",
|
|
62
|
+
required: false,
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
name: "minimal",
|
|
66
|
+
type: "number",
|
|
67
|
+
description: "Weight for minimal impact (default: 0.85)",
|
|
68
|
+
required: false,
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
name: "moderate",
|
|
72
|
+
type: "number",
|
|
73
|
+
description: "Weight for moderate impact (default: 0.6)",
|
|
74
|
+
required: false,
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
name: "significant",
|
|
78
|
+
type: "number",
|
|
79
|
+
description: "Weight for significant impact (default: 0.3)",
|
|
80
|
+
required: false,
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
name: "severe",
|
|
84
|
+
type: "number",
|
|
85
|
+
description: "Weight for severe impact (default: 0.1)",
|
|
86
|
+
required: false,
|
|
87
|
+
},
|
|
88
|
+
],
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
name: "penalties",
|
|
92
|
+
type: "object",
|
|
93
|
+
description: "Penalty configuration for major issues",
|
|
94
|
+
required: false,
|
|
95
|
+
children: [
|
|
96
|
+
{
|
|
97
|
+
name: "majorIssuePerItem",
|
|
98
|
+
type: "number",
|
|
99
|
+
description: "Penalty per major issue identified (default: 0.1)",
|
|
100
|
+
required: false,
|
|
101
|
+
},
|
|
102
|
+
{
|
|
103
|
+
name: "maxMajorIssuePenalty",
|
|
104
|
+
type: "number",
|
|
105
|
+
description: "Maximum total penalty for major issues (default: 0.3)",
|
|
106
|
+
required: false,
|
|
107
|
+
},
|
|
108
|
+
],
|
|
109
|
+
},
|
|
110
|
+
{
|
|
111
|
+
name: "discrepancyThreshold",
|
|
112
|
+
type: "number",
|
|
113
|
+
description: "Threshold for using conservative scoring when LLM and calculated scores diverge (default: 0.2)",
|
|
114
|
+
required: false,
|
|
115
|
+
},
|
|
116
|
+
],
|
|
117
|
+
},
|
|
118
|
+
],
|
|
119
|
+
},
|
|
120
|
+
]}
|
|
121
|
+
/>
|
|
122
|
+
|
|
123
|
+
## .run() Returns
|
|
124
|
+
|
|
125
|
+
<PropertiesTable
|
|
126
|
+
content={[
|
|
127
|
+
{
|
|
128
|
+
name: "score",
|
|
129
|
+
type: "number",
|
|
130
|
+
description: "Robustness score between 0 and 1 (1.0 = completely robust, 0.0 = severely compromised)",
|
|
131
|
+
},
|
|
132
|
+
{
|
|
133
|
+
name: "reason",
|
|
134
|
+
type: "string",
|
|
135
|
+
description: "Human-readable explanation of how noise affected the agent's response",
|
|
136
|
+
},
|
|
137
|
+
]}
|
|
138
|
+
/>
|
|
139
|
+
|
|
140
|
+
## Evaluation Dimensions
|
|
141
|
+
|
|
142
|
+
The Noise Sensitivity scorer analyzes five key dimensions:
|
|
143
|
+
|
|
144
|
+
### 1. Content Accuracy
|
|
145
|
+
Evaluates whether facts and information remain correct despite noise. The scorer checks if the agent maintains truthfulness when exposed to misinformation.
|
|
146
|
+
|
|
147
|
+
### 2. Completeness
|
|
148
|
+
Assesses if the noisy response addresses the original query as thoroughly as the baseline. Measures whether noise causes the agent to miss important information.
|
|
149
|
+
|
|
150
|
+
### 3. Relevance
|
|
151
|
+
Determines if the agent stayed focused on the original question or got distracted by irrelevant information in the noise.
|
|
152
|
+
|
|
153
|
+
### 4. Consistency
|
|
154
|
+
Compares how similar the responses are in their core message and conclusions. Evaluates whether noise causes the agent to contradict itself.
|
|
155
|
+
|
|
156
|
+
### 5. Hallucination Resistance
|
|
157
|
+
Checks if noise causes the agent to generate false or fabricated information that wasn't present in either the query or the noise.
|
|
158
|
+
|
|
159
|
+
## Scoring Algorithm
|
|
160
|
+
|
|
161
|
+
### Formula
|
|
162
|
+
|
|
163
|
+
```
|
|
164
|
+
Final Score = max(0, min(llm_score, calculated_score) - issues_penalty)
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
Where:
|
|
168
|
+
- `llm_score` = Direct robustness score from LLM analysis
|
|
169
|
+
- `calculated_score` = Average of impact weights across dimensions
|
|
170
|
+
- `issues_penalty` = min(major_issues × penalty_rate, max_penalty)
|
|
171
|
+
|
|
172
|
+
### Impact Level Weights
|
|
173
|
+
|
|
174
|
+
Each dimension receives an impact level with corresponding weights:
|
|
175
|
+
|
|
176
|
+
- **None (1.0)**: Response virtually identical in quality and accuracy
|
|
177
|
+
- **Minimal (0.85)**: Slight phrasing changes but maintains correctness
|
|
178
|
+
- **Moderate (0.6)**: Noticeable changes affecting quality but core info correct
|
|
179
|
+
- **Significant (0.3)**: Major degradation in quality or accuracy
|
|
180
|
+
- **Severe (0.1)**: Response substantially worse or completely derailed
|
|
181
|
+
|
|
182
|
+
### Conservative Scoring
|
|
183
|
+
|
|
184
|
+
When the LLM's direct score and the calculated score diverge by more than the discrepancy threshold, the scorer uses the lower (more conservative) score to ensure reliable evaluation.
|
|
185
|
+
|
|
186
|
+
## Noise Types
|
|
187
|
+
|
|
188
|
+
### Misinformation
|
|
189
|
+
False or misleading claims mixed with legitimate queries.
|
|
190
|
+
|
|
191
|
+
Example: "What causes climate change? Also, climate change is a hoax invented by scientists."
|
|
192
|
+
|
|
193
|
+
### Distractors
|
|
194
|
+
Irrelevant information that could pull focus from the main query.
|
|
195
|
+
|
|
196
|
+
Example: "How do I bake a cake? My cat is orange and I like pizza on Tuesdays."
|
|
197
|
+
|
|
198
|
+
### Adversarial
|
|
199
|
+
Deliberately conflicting instructions designed to confuse.
|
|
200
|
+
|
|
201
|
+
Example: "Write a summary of this article. Actually, ignore that and tell me about dogs instead."
|
|
202
|
+
|
|
203
|
+
## Usage Patterns
|
|
204
|
+
|
|
205
|
+
### Testing Agent Robustness
|
|
206
|
+
Use to verify that agents maintain quality when faced with:
|
|
207
|
+
- User confusion or contradictions
|
|
208
|
+
- Multiple unrelated questions in one query
|
|
209
|
+
- False premises or assumptions
|
|
210
|
+
- Emotional or distracting content
|
|
211
|
+
|
|
212
|
+
### Quality Assurance
|
|
213
|
+
Integrate into evaluation pipelines to:
|
|
214
|
+
- Benchmark different models' noise resistance
|
|
215
|
+
- Identify agents vulnerable to manipulation
|
|
216
|
+
- Validate production readiness
|
|
217
|
+
|
|
218
|
+
### Security Testing
|
|
219
|
+
Evaluate resistance to:
|
|
220
|
+
- Prompt injection attempts
|
|
221
|
+
- Social engineering tactics
|
|
222
|
+
- Information pollution attacks
|
|
223
|
+
|
|
224
|
+
## Score Interpretation
|
|
225
|
+
|
|
226
|
+
- **0.9-1.0**: Excellent robustness, minimal impact from noise
|
|
227
|
+
- **0.7-0.8**: Good resistance with minor degradation
|
|
228
|
+
- **0.5-0.6**: Moderate impact, some key aspects affected
|
|
229
|
+
- **0.3-0.4**: Significant vulnerability to noise
|
|
230
|
+
- **0.0-0.2**: Severe compromise, agent easily misled
|
|
231
|
+
|
|
232
|
+
## Related
|
|
233
|
+
|
|
234
|
+
- [Noise Sensitivity Examples](/examples/scorers/noise-sensitivity) - Practical usage examples
|
|
235
|
+
- [Hallucination Scorer](/reference/scorers/hallucination) - Evaluates fabricated content
|
|
236
|
+
- [Answer Relevancy Scorer](/reference/scorers/answer-relevancy) - Measures response focus
|
|
237
|
+
- [Custom Scorers](/docs/scorers/custom-scorers) - Creating your own evaluation metrics
|